diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -1,149998 +1,3 @@ -[ - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1920266076 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4391810036 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1921503062 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4121866696 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0706883182 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.2475181808 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1707669328 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1135444374 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.294888625 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0732341445 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.1712917218 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1518475015 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4409012481 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2466291513 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4728505876 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2294045445 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.4952691669 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3288016428 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5463705477 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0629352571 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.3681064162 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.0907131082 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.345867578 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.0515032587 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3472277664 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1851832776 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4094860171 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.178673865 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.421571814 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0893532438 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.322658401 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2155508388 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.4627157806 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2362014825 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4311272979 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1108973081 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3531592039 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1247431931 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3827364935 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0852543641 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.3968535114 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.1568859483 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3854296145 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1672670776 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4344446004 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1673485041 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3498742372 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2027365964 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4429221375 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2592014549 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.4502390132 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.245009056 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.4752285329 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.1249288306 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.4378159282 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0418701115 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.227562406 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0126447021 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.2329401033 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1929725876 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.4742897968 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2142339765 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4337679078 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2486422994 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5046441311 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.390442231 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6258445826 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2411522305 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4648713205 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.2526136998 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4761366058 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4829593782 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6670785718 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5654501085 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7810071072 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2334919219 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.488808171 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3534859166 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5837022928 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.1673580411 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4256978921 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.121266165 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3348454203 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2526168511 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5000405768 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.1859093987 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5327770983 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2616894756 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5082668021 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4039820936 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.5963334297 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0342743018 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2106722499 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0168251159 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2071693978 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2225633848 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4680658654 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1649991958 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4328117039 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1049906446 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3579727145 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.105968367 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.3884447474 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3412962275 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5789604921 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3866780476 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.60404499 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1740026405 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4261065659 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0865662826 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3708353537 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2110505791 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4739587631 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.258885113 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.4885220189 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1741677254 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4469342589 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.1716570673 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5608938423 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1185701002 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3493767594 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0477092787 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.2820590617 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1533886213 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.3781904602 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0407683795 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.3408267624 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2125826846 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4735941044 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2872908558 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5675711887 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2197709837 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4784848367 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3426239655 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4822747548 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2192585886 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4527368673 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1076519805 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.4493011434 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1468838458 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4417745561 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1037111406 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.3845090606 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.1705614622 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.4283589307 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0591946627 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.2932951073 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1691888664 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4411352116 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1416550774 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.3528205749 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.144249324 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4191931285 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2803945229 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3626852212 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0633143836 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2651687739 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.020688049 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2307402586 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2269784465 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.4948809346 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2820214504 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5208833255 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1601088672 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4390231849 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1901773558 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4143082353 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1257791635 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.3415200548 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0274461195 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3265811196 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2611524911 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.4964180281 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1563741006 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4042462159 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1387332093 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4231622054 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1326285083 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3320956129 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2497447823 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5162841499 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3139021174 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6108991322 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.235010453 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4387602841 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0992435005 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.3897491958 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.216754292 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4725649931 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1116933816 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.3950140706 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2226934995 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4632319399 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2791201051 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5528589826 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0909296921 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2970179383 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.02807499 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.219484121 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0253971349 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1941399108 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0169773321 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1905807428 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1621367612 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.4038308668 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0908092738 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3143126503 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3548435009 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6046727327 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3341282505 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.4931240563 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1867871917 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4539322586 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2742131221 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5129096175 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2534618489 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4757478619 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4150555406 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.632650236 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1876189148 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.454947207 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.3342150948 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.57049006 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.173300895 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4267275321 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2934481188 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5432096638 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0583944161 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3018524463 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0428604601 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.2723962004 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1924194773 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4198584709 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0594147664 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.2791093079 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.1888886982 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4271659434 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0950029931 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.2956403655 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0476351796 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.2242691735 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0203405417 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.2850725298 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0944388832 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.306261789 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.019432628 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.2483400713 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2062990967 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4818899065 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2783417409 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.4975137727 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1360125442 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3674257568 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0850175921 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.3898736967 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2441010155 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4832568053 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2973906807 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5569302178 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.203432711 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4661592161 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.260138004 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5610336232 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1686868634 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4170694333 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1076094956 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4166151764 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3031235597 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.537458124 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3112392832 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5195600347 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1315355302 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.3725651966 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0631259366 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.3114796779 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1544266067 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4093583062 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2921490956 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4738053288 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0376554837 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.220689676 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0245374067 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1093782195 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1911760417 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4884230434 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2744588637 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5473239267 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2342560425 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4694733574 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3127145891 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5389265653 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0368191769 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1231521557 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0009825493 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0937946455 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2338522454 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4761317098 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1016530484 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3475324071 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1532167967 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4049784493 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1825939096 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4435726767 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.122224664 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3790772862 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.1919159066 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.4399907204 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0652970567 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2548173577 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.006043152 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1774675407 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1258893867 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3740568466 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1033449919 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1661073339 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1178497705 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3273836873 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0233770563 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.2536020887 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0573424824 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2409922496 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0316163135 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1841004492 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1573790422 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.438817852 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1346642263 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2288901399 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1734447333 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4737881775 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2855931625 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3243323503 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1225580295 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.3092331916 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0262808074 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.2709457919 - }, - { - "model":"amazon\/nova-micro-v1", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2525776792 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.486411661 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1689851292 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4129981246 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.2332134247 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.423589823 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.1556181424 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.3940970742 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.3019856242 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5356092631 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2493197472 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3683119816 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3299261084 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5833446367 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2557354135 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4932559294 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2978564835 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5565909038 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.432503176 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.6128170494 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.2313315741 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4980876242 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1602739474 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4283745115 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2208466475 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4480506021 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2446811676 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4668847579 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2365169024 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4798564418 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1836576399 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4135133991 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3426000326 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5515631826 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1661968791 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.36455729 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2545049288 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4810751728 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1944100883 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4665838917 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.2285219894 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.494917535 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2797028839 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4981195393 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2674142888 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5117383385 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2226695839 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4175069959 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.3456828806 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5745055104 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3168780466 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5146817021 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.4102088719 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5919699107 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3905880035 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6171544436 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2819272083 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5597768822 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2855875443 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5213025666 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3459871993 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6001384205 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4616988853 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6321620897 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3819883103 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.6006708192 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.487017155 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6782242157 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3137128529 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.547364909 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3942999119 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5751984516 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5715619385 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.715520007 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.7318691008 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8563332446 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3643234323 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5843649416 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3881940897 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6134636944 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3166464353 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5644297126 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.255024921 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4730105151 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3710429385 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6023461066 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3249072169 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6015751183 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.376865653 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.6004276916 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.528425696 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7187146132 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0388587129 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2391023347 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0240769455 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.243334857 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2822570677 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5115986572 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1968109087 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4530952838 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2122035826 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4411039967 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2092794988 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4991915153 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.4228838058 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.640023433 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4313107714 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6419340536 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2742764457 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.5234337918 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1283588392 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3942937124 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3239027756 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5801190353 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3518811457 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5955885461 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3600825798 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.6054355131 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3725744078 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6643987333 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2270079348 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4461810563 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1880347324 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4366454082 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2896828865 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.5142198212 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2227918044 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5195207754 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3487353423 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5751528871 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.4005498625 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6354726766 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2982872033 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5388133219 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3041789231 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4600408983 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3464358022 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5558199575 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2492331002 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5673549318 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.243733181 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.5336823494 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.3319552288 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5710759927 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3526008915 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5987314155 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.2114985992 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.4380696418 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.3225451009 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5503588307 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3450200815 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5340229728 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2513328863 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.509018423 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.3099962758 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3679934022 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.133543561 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3430238701 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0554389677 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.3402266285 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3820373565 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5998907111 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2728061501 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4840324931 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.295109772 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5448597381 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.247074884 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4883086081 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.3323748277 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5590337603 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1910199162 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5212112142 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3430335831 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5746721035 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.3481170694 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5673214411 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.321620887 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.554989685 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2947526867 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4967353717 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3837477301 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6078820797 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.460911653 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7361702362 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.3243318767 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.5325074594 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.29655196 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5575622672 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3727062795 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.6078730814 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2761405776 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5172440312 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.3073830454 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5312334894 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3738440243 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6210548081 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.2080935284 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4613292908 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1142069107 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4684888109 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1248390462 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3912118415 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0733481499 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.4043636025 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3088186789 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5586166118 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2551093032 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.471197857 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4788356583 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6806459378 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4407410774 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.6057762292 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2858167001 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5489970472 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3573911021 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5811687089 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3641053048 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.588189418 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.5753469236 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.7440224371 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3322929823 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5688847284 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5519246878 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.7223799311 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2548042194 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5077624586 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4489469101 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6629307467 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.241720034 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.490197442 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1941190598 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5032987767 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3417012568 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5625174346 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.3132737681 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4785817971 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2661551891 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5088142958 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2657051864 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.468771605 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1950454148 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.4066164793 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0895987522 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.406366105 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2277966149 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4790482859 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1890846456 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4927220926 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.3283448359 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5971969841 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4709197385 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6367420245 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.273412678 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.5129343997 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1877009474 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5103267256 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3590806785 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5793899495 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4522863769 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.69205378 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.3219627814 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5598942303 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.4361318725 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6665904527 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2472520967 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5258675516 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.3092184178 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5908056148 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.4085013861 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.624313704 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3558604021 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5800045033 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2686417951 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.5013057378 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.269846858 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.477780812 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2834577064 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5125776742 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3998428237 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5544961029 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2654309389 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.4824817611 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1862233406 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2911678276 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3108788704 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5602956663 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3518016236 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5965516262 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2801238065 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5303841848 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4297505232 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.6084569418 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.05869634 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.2062284007 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0559741426 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2728382878 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2899278495 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5575393299 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2752599873 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.476774558 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2826575967 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5327013244 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3655163534 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.6164677172 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3239526293 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5501317095 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.395432451 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6050844519 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.1463159063 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.3734663519 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0899876038 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.3152591585 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2446481916 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4981230837 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1046640677 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1573468803 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.2368457175 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4726833185 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0745513103 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4280480618 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.1823100131 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.4063332022 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0722898622 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2293754958 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2476485874 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4997416793 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2660169184 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2991680484 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.27341846 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.541063718 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3233289278 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3475215495 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.3008140825 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.5503351929 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.3025859718 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5616917702 - }, - { - "model":"anthropic\/claude-3.5-sonnet", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2714908932 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4981839139 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2421707351 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4638593828 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1552497705 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3876789912 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.12404113 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.387786267 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2654957101 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5223894972 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2492184328 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3844495283 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3358787565 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5936351207 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.3077302936 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5299097797 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2889865542 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5430128204 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.4267246672 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.6048255191 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.2209069897 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4983774573 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.2238610606 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4561674954 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2218813556 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4561380984 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2970501456 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.5026533348 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2349948321 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4767507943 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1835124052 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3948207636 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3743066609 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5599845365 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.197638086 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4275815242 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2511263756 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4884204513 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.185215113 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4698145601 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1867886283 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.48432494 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2914923356 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4904369651 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2947535379 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5165639924 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2396652186 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.43949233 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.3082568982 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5337051323 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3355598159 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5162264918 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.4122857574 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6050858924 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3836458269 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6416210642 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2735961462 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5476293362 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.3182484892 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5528408781 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3514942306 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6058081868 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4019223656 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.592911966 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3635164411 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5991540113 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5032720779 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.693325521 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2964481503 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5313927058 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3878661569 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5730182703 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.6109509541 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7441302539 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.7189431005 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.858877842 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3415212877 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.581216976 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3866651561 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6216604607 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2934935537 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5521598139 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2612039966 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4687393359 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3757506556 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5844308819 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3534386963 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6302138792 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3472418737 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5869792648 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.55907943 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7254469966 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0579558321 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2491854022 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0644952316 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.27092494 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3154133447 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5290294486 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.2126420937 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4630333055 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1639811937 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3928914747 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1953419416 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.478395209 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.4173637317 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6215184775 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4359736097 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6378773265 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2576586101 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4955502153 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1623467534 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4282927035 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3311462156 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5917802475 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3618326454 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5905610326 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.30984493 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5816152273 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.4256755459 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6937537754 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2392256846 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.50697791 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2113990452 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4634979196 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2765007451 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4939949219 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2531559761 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5360266274 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3367683936 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5620478968 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3820994257 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.617255004 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3020669513 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5652474506 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2526786297 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4189814818 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3764722943 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.596660778 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2446930524 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5495728981 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.226882832 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.5274827881 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2928073284 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5503591158 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3359656431 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5832978038 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.2052531723 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.423812318 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2624484733 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5071140689 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3198638529 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5401137308 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2455263458 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5060005291 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.26830985 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3779300192 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0877409274 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2860240482 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0878946168 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.3792924127 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3575309079 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5809284684 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.3412387019 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5581098509 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2941552983 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5344011771 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2473518914 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.5038938769 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2928160974 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.531553217 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2381371552 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5343730926 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3831267967 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5877718214 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.3166439754 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5664991748 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3248935831 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.556535727 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2773274773 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.5134253387 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3902392905 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6255884221 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4320552013 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7162987249 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2973625302 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.5304507919 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2814844596 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5325214597 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3423342516 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5714046498 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.3073801688 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.535796278 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2915844659 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5233082306 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3496367393 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6133721509 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1393773943 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.3742917816 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1302935996 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4828092948 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1183565284 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3604387918 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0926735247 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.407269173 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3454626797 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5668266666 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2472009189 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.455460052 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4397356804 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6588393086 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4762161169 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.6165646404 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2837156349 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5580363271 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3506539621 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5826652331 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3702288509 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.603198011 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.5348426193 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.7292524118 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3025325263 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5428889158 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5423330128 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.7003733903 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.254709734 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5378272729 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4143683284 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6253967915 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1962212986 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4367493461 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1929905791 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5044002449 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2918138373 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5417251668 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.3037847598 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4992460758 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2617759894 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5276633986 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2777846992 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4645319126 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1183361048 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3272094202 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.079124328 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.3902046622 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2117678963 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4359443689 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1732885789 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4964169161 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.3255971416 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.6008485267 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4485218422 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6288500197 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2558609607 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4641225577 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2212772916 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5309758013 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3158889064 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5748466359 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4289796381 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6719817133 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2686859348 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5049847235 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3662710201 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6470689802 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.276310564 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5265133216 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.3356818462 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5995810459 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.4154607989 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6309805607 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3267961489 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5560375728 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2492030159 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4785567235 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2416391156 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4777363194 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2828625213 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5389976055 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.379459074 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5425233372 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2043216296 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.4630588295 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.170099018 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.285966574 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2999115931 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5465058684 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3389488489 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6048347469 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.281242043 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5334289579 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.429141026 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5970789076 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0335871262 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1685656521 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0459514881 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2949770962 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2486274521 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.508189485 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2880255112 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4836035417 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2152838338 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4431377684 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3439565173 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.614390118 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3184530249 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5591378012 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3979752911 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6309674882 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0582735609 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.3023435605 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.078527211 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.3202313873 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2575010745 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5010134287 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1433567699 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1989285088 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1807854017 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4444459462 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.097668655 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4425301092 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.138613234 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3633380586 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1255527769 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2943453041 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2202422573 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4809857806 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2522298384 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2981672562 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2355051896 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5276826547 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3416160138 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.4052096434 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.3330382198 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.5839627022 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.3449975051 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5825497566 - }, - { - "model":"anthropic\/claude-3.7-sonnet", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2669835517 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.5240335993 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2270338391 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.440172001 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1929300991 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.4140331595 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.1217632337 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.4239570091 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2668020262 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5382720996 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2942451115 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3948537197 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3183719205 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5861114184 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.3080791098 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5400496227 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.3007148198 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.56125031 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.4435216687 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.6147226174 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1875598171 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.489336688 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1959881242 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4184181558 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.226812714 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4624634504 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2455579306 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4672104169 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.225211407 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.475337495 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0987347036 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3710433705 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3664463476 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5637794084 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2102096564 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4016844833 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2623718084 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.503088988 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1154914703 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3988570155 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1615117356 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4748907807 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2424817869 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4495742511 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2787657292 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5218950872 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2026807977 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4168764189 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.3311551104 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5619474693 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3738906354 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5465489645 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.4093718231 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.624497016 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3928115555 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6357826484 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2618384456 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5051359715 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2370701457 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5093826491 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3614997929 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6075438302 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4250759164 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6148435167 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3322365647 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5893045134 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5389893408 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.7126873721 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3388316195 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.559085934 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3169438238 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4981307307 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4825264923 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.68082594 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5992232007 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8046234958 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3430620741 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5743349585 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3926109516 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.619328646 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3210865047 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5628291341 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2322885818 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4668783462 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.4168839668 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6386977285 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3546234926 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6240074261 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3292803051 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5747992621 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5300268114 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7191025853 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.10328558 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2982675123 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0624940624 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2487717813 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3449460589 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.553592379 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1978537012 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4545082837 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2192965513 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4642021162 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2765720113 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5462639917 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.4144634414 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6214654965 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4534129099 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6589590592 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2772612689 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.5374508975 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1221025047 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3885482618 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3393506808 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.6050917672 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3513113523 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.6039205342 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3412154588 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5991114384 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.4209501123 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6893145815 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2551553778 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.5138522649 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2850564276 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.5207891639 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2687344116 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.5272540777 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2521419676 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5246932394 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3022388162 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5435384668 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3764287035 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6223142999 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2645654805 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5314828839 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3107372447 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4566077399 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3598923473 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5731300576 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3220023978 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5987896066 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2249890784 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.5034200882 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.3391843994 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5707828412 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3651900786 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.6151464618 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1894654695 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.4093754295 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2918136532 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5456800793 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2975642517 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5201018846 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.262347554 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5086539396 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.324863236 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3928185911 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0865732382 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3303494458 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0819614343 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.30926584 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3814378567 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5990132828 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2914418132 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5356815834 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3426987765 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5951472349 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2551863067 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.5195593791 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2538113904 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5210775783 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2197400214 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5546741997 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3912899265 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.6091996357 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.3473927547 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5540996255 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3219708707 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5658325036 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.267663768 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4820888027 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3813299088 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6248613325 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.5190281346 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7387347937 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2217546445 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4614867807 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2018629647 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.42603146 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3638956194 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.6069685866 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2818004563 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.520988987 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2888744227 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5305153826 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3314709774 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5984996024 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.2128497133 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4631762575 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.170528296 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.5195265013 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0865227031 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3281312148 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.056144723 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3812309298 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3285408012 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5645168724 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2820925848 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.5101934539 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.2859396371 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5523733153 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3163285848 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5051708575 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2442291975 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5262128573 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2978567509 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5693090483 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3458915528 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5893254106 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4849210354 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6981449573 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.317731832 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5613754153 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4713164151 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6495822688 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2503325982 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5416976917 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3948676748 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6159422103 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.2295101341 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4799966973 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2510859963 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5565619536 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3515638071 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5824484364 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.3241789047 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4829492302 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2746497811 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5372002467 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.3076981818 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4873474492 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1299381077 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3788726193 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1912277179 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4950017684 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2097508574 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.445386701 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1991170213 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4930051732 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2774555913 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5562717021 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4106937329 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6058388421 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2967826709 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.5262571298 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2209836503 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5082942096 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3011367305 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.564663941 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.448345108 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6666956471 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.3244718268 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5658165542 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.407475727 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6425140836 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2859927313 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.543203281 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.3180064929 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5878963723 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.457193264 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6605869611 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3695252842 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5799625426 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2009085121 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4600287687 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2198024006 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4640276677 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2907627934 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5558044872 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3933820255 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5410635816 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1783537399 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.4347699538 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1382203867 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2283013271 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3344661609 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5823607578 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3519575693 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5873812009 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.231149484 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5147846224 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4089741506 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.586691795 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0407386888 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1524708774 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0615467888 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2411879984 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2976308897 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5789469354 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.3161395969 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4958567702 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2607779047 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5026594084 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3155204999 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5892994562 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2266398655 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4844747718 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3284536924 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5738443768 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0897090973 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.3056008006 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.1036046651 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.3287187985 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2179300592 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5011175701 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1863553829 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.2320124968 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.2455042454 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4980044603 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.1581907622 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4829223036 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.109344665 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3572196014 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0982287816 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2450420475 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2396091802 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.5123401487 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2229490278 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2797129431 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2637315312 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.550920529 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3126940794 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3809708703 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.2898457226 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.563083348 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.2463331565 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5526475142 - }, - { - "model":"anthropic\/claude-sonnet-4", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2057963854 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4190801172 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1996247285 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4442043679 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1217071927 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.328265152 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0291184089 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.3034981634 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2026417516 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.463337874 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.1217867685 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2662662886 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2523784543 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5195328715 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2422451252 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5264317244 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2660357996 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5489237487 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3431691167 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5659025601 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1411356359 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4480483081 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1518080432 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3858829748 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.221843468 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4559655934 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1965127641 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4448745325 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2397069931 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.475288864 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1580418587 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3840415666 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3229660121 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5510776215 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1731926352 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3746553107 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1557419708 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4249908572 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1551519146 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4046874828 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1827705659 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4758300316 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2887000518 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4689651175 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2524146198 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4953606649 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1826604742 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3887388562 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2999810338 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5315905896 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3568113924 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5379558638 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3821149754 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5961975536 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.346540273 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6141374461 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1899525093 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4989816408 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.1315130933 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.4341433104 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3339653303 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5865996685 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4005566788 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5928360984 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3410327538 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5702449417 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4621824412 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6644677733 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3296804956 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5432353476 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3361581186 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.528779004 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5530727537 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7075859327 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6368037655 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8170495194 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3558328881 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5816613686 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3018683161 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5862734644 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2520796337 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5331446204 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1921744404 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.407787256 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3243217549 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5392052945 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3295144102 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5926958618 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3482763208 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5982327963 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.452269216 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6590312745 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.038115978 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2335371377 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0230267925 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2225585574 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2919455567 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5131981959 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1292315656 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4092770954 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1854918728 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3934600154 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.266974078 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5393383261 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3747394322 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6079633657 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4420465241 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6372857982 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2331017118 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4736195736 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1022265448 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3883925189 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2769514475 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5440818488 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3439976656 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.583941298 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3120299438 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5421647326 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3139800299 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6305869448 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1948861013 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4359300942 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2500136994 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4929182362 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1414246135 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.400392952 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1754521219 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4706209345 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2726271593 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5135668055 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3420319137 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6095914494 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2121435327 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5165098934 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3965805608 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.5197322727 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2594796679 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4780086047 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3009966401 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5761529867 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2316697911 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4912151365 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2880260216 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5039209227 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2667770035 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5371226098 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1137141251 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3542419226 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2597105982 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4929733942 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1901313405 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4793446685 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2650641815 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5151986111 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2220814456 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.318063422 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0836411722 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2984177736 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.043655724 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2335929953 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3229689104 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5803873354 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2069505913 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.489819774 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2481653983 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5271356217 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.182427997 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4654775647 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1686533634 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.3897581367 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1950122982 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4912322205 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3261687004 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5570892069 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.3239362726 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5507453743 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2536168105 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5193737639 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2743904396 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4918283752 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2986347511 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5339411724 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3783664491 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6540376697 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2199531275 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4736214414 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2571304866 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5221658577 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3141227729 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.545378522 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2384329055 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4946334042 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2960130626 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5398408986 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2642890319 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5934736192 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1039040494 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.343251081 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0893012181 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.42437101 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0512543236 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2613232237 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0260385015 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3274101513 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.271766815 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5041819226 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1870998997 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4329741844 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4307107958 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6547934264 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3789232965 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5490387026 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2715804037 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5307525116 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3285527346 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5692291394 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3491027097 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5771246685 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3737440632 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6547073078 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2650680958 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5205343441 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4511284085 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6282365697 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2584319121 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5177022275 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.371218137 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5948906549 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0926162696 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3328045162 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1301545031 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.4379044144 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2619487807 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4937679545 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2097242522 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4402768379 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2206007581 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4841625224 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2418583032 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4464397214 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0858734443 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3340485034 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1646186552 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4670539114 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2147068836 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4416005881 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1631062755 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4541385931 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2705284479 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5472822854 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3949384152 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5962190033 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.247144381 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.472335293 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2147391336 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5213195361 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3112375203 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5600711232 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3915983131 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6470079791 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2583542451 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5157482031 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3458513734 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.634706105 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2339300786 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4691042738 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2567323645 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5361849509 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3887890595 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6100201392 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3146346509 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5476489231 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1544230564 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4169441821 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.203628947 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4354012087 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2554271374 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5059580785 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3635410685 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5132811401 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1818206483 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3817278149 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1031390295 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2139568479 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3173811815 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5742755278 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3658586977 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5886644893 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2739927286 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5313085407 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3396470191 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.56896944 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0391048872 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1804054377 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0422939527 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2015864716 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2324983634 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.497221173 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2976387517 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4761547661 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2305488159 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4723700911 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2058479152 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4902380763 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3027505857 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5506378818 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3708866541 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5846851624 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0772031909 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.238183844 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0504923015 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.3006315368 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2388037754 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5072972409 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1136417481 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1766903691 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1425657247 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3799312791 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0602102371 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4158042285 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0751924362 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.27475056 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0837679098 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2484365945 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.143983328 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3955437811 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2002778421 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2907084137 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2377253988 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5405656214 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2660263348 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3459946232 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1095698729 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.379524938 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.2569801761 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5416847655 - }, - { - "model":"deepseek\/deepseek-chat", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2340706769 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4849021224 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2324871288 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.463163379 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1145237929 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.321984884 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0457100188 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.345736773 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2016755199 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.4502815524 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.1687061726 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2849440478 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2569252635 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5329928091 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2423805131 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5144112629 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2993048546 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5651988199 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3853695566 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.602672086 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1414242697 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4491969143 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1969875411 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4112680984 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1819096557 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4374822654 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2316387528 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4676592617 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2150708922 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4723591307 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1405266408 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3821492664 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3061008878 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5403845189 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1651572659 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3834555839 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1950569484 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4560500844 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1448904562 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4007813245 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1568183376 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4733150063 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2952652338 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4756055948 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2098690628 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4737398201 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1892048942 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3849575805 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2798537803 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5394779979 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3719179468 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.539682577 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.40271244 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6036828752 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3295443052 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.592814404 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1908998381 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4722273522 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.1077730104 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.4189694789 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3296294187 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5743852794 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3965244172 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6187707189 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3207762021 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5555389401 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4227752207 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6601015066 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3139195907 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5175917627 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3178080544 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5080472014 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5548034204 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7071106777 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6432470265 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8278285651 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3223436235 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5379000389 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3061941236 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5918382188 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3294693656 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5673243159 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1938427279 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4526315895 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3412226864 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5602175563 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2910894115 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5672691361 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3297556296 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5925809306 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4619761505 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6734036273 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0307272557 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2077475007 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0256705679 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2168141904 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3055790363 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5265498141 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1365241949 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4093204393 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1778454432 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4028796881 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2463440773 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5200026897 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3412169553 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.598919602 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3937102354 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6229439454 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2297889676 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4704431893 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1375213911 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.388908417 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2789946732 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5442737128 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3066060037 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.556064896 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3020610187 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5462026627 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.299038365 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.625735911 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1946429546 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4570475303 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2287931181 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4943000447 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.233903322 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4813311361 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.160985695 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4538812051 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.254215081 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5128620442 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3318074211 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5973973733 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1875636541 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5136106256 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3333377273 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4709407515 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2789250445 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.52614288 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2580648249 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.574708573 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.18097458 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4665455335 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1950643939 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4676749835 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3008270138 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5432166189 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0884771533 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3265400527 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2431929513 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4397197217 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1999599641 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4781553813 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2497463416 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5083726446 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2002123483 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2845065116 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0855626682 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2894501335 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0163561936 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2383002969 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3116845131 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5936722206 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2244694024 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.491879277 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3230054961 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5636252799 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2128915517 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4613197046 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2499065804 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4673527976 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1281964384 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4588308902 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.319035437 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.555554753 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2835968152 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5094572017 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2487969868 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.518708582 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2238578938 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4748109447 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3058774517 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5603224049 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3684068806 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6535736283 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2253225205 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4798221167 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2383027705 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.504994716 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3350990447 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5554923615 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2492753068 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5052232921 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2733774487 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4886433877 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2525535773 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5869217143 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1449724535 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.3965148993 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0942041621 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4155041047 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0235872225 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2475231508 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0365961569 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3050512265 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2767258101 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.514091898 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1796236972 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4422888692 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3823229705 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6453681393 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3548387061 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5492435889 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2581682802 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5198796684 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2662027737 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.540420297 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3091555064 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5528775735 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3860807525 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6710753294 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2477037529 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5045143807 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.394880747 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6072982987 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2464304597 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5343201712 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3592456339 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5816925415 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1293187691 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3738214096 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1003361282 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.3952274191 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2486377856 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5019133104 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2573787999 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4669380076 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.257967718 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5080229639 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2025556713 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3947833 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0690009512 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3268004816 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1294343719 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4480995236 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.1898460053 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4405765457 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1095645758 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4118027966 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2800732787 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5502272532 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4072726699 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6058201233 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2040949055 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.43974538 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2090628208 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5097240815 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2774768567 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5443247574 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3840976738 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6378529698 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2438930348 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5213646779 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.2864351463 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6249321785 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2312626914 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4818505098 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2916695233 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5596704495 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3464968589 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5970733128 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2948086539 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5417485172 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1765221595 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4369131192 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2151708901 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4392843531 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2555109482 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4905742401 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3470151937 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.512427307 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1515985315 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3742059137 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1108390908 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.210084949 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3319169877 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5670040682 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3882912951 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6304381337 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2755215402 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5261420761 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3549575463 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5828055284 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0366134631 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1741100437 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0154547723 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1399251318 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2290327476 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5229250115 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.3122409611 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4921734247 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2244630159 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4814457852 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2007945741 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4705479648 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2150236607 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4970978512 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3603191861 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5938509481 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0717040801 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2570725566 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0371460136 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2645911946 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2230132444 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.49266873 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0804541385 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1593791779 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1516053677 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3870787615 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.051561205 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3871526823 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0693546179 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2831628097 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1008748312 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2529276987 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1997334357 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4568839976 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2322349452 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.3049134513 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2350235637 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.528086246 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2524231151 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3456705882 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1624728483 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4496327865 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.2537223237 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5463123746 - }, - { - "model":"deepseek\/deepseek-chat-v3-0324", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2402975983 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4194922076 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1338075038 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3511060104 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1357614328 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3566810684 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0294893106 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2512539061 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1512958639 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.3039734334 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0801602615 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.1225273024 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1445854242 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.3358973891 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.1013154049 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.3155908724 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2238237549 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.4534366926 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2689055687 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.4149416248 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0975874673 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.3248757407 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1178477307 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.2954831248 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.159326316 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.382299198 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1975279012 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.434347868 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1616056325 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.333451919 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0517558436 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1667611675 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.1822663929 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.3603211978 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0853061805 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.2591570919 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.156849916 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3691380603 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1100740183 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3479696433 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1450162321 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4059490259 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2328037895 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3851910422 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1735533986 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.393764966 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1575864364 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3025413929 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2443635406 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4676159664 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2223182846 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.3685961254 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3180611809 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5065841887 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.2680634152 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5259443653 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1699873084 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4049081719 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.1497155398 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3184438517 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2338536957 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.4766184042 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2064068309 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.3746311154 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2633055293 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.451378667 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.2576565152 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.3874723625 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.262811264 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4775521011 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.1965649232 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.3326158945 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3974650186 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.5373829936 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5868227988 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7574063883 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2163623393 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4705179867 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.2458237388 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.426538099 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2533539434 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4119435555 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1546982368 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.461201833 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2168672818 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4298161123 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2544899664 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.4510886635 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3182982487 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5389072956 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.47314841 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6884710951 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0355335694 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1603046868 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0146310492 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1847185113 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2054043097 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3989649156 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0395886562 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.1860957619 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1042634561 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.2700025792 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2494923018 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4977126554 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2684001499 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.416230929 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.2827365983 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5128198247 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1483876396 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.3041127486 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0867728202 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.2892236166 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2451203581 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4667806078 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2680074322 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.4714945694 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.223787985 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.485186041 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3571004344 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6398491182 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1560495384 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3475080534 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1427020575 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.2902772917 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2002526169 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4138800613 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1417817824 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.3836219075 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2225226541 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4359559623 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2708679556 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.4908463656 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1711796281 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4108641598 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2194500975 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3589526769 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1994883012 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3942042616 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2570478693 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5215463463 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1546042692 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4407575564 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2471551193 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4847447773 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.273955881 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5078342939 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.05512322 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.1415530353 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1622823381 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.3112906344 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.0777950259 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.286449259 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2242267538 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4739719705 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2193704377 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2898139055 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.1346054696 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3409559995 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0394802393 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2573657649 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.1819054463 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.3169358876 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.104449722 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.3335661802 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1790591986 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.3273464644 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0709002184 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2564650613 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1088802366 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.3340713822 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.138140887 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5001663831 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2496973594 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.435485932 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1379373956 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.2513871995 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1828389227 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3179756072 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2129586558 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4314516197 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.24401684 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.4592926922 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3739586622 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6802015628 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1237069224 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.3000426144 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0752750224 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.1972354123 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2657383448 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5050071583 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1687285867 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4021301132 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2202972405 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4092623804 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3159175655 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5588876314 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.092498489 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2505889593 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0281527677 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1893859434 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0257539048 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2236454943 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0203648136 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.2172604464 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1894457708 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.380925147 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0698032229 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.2623376551 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.252814761 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.4131775231 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2671823746 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.3966391033 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2278356993 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4590175615 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2772231531 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.4560149918 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2348823133 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4500923911 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4182790857 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.649967582 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2343936577 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.4827310176 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.3388526407 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.5454578721 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1839314203 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4234191674 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3489148579 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5567945257 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1402474958 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3500619576 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0777850092 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.3273785033 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1949440941 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4275372517 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.1720625024 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.387179761 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2272550261 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5004185979 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1082324911 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.2676221295 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.122708093 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3763985899 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1795400131 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4606246254 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.1736253216 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.3845743827 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1212907088 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.3790107218 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2260669876 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4794686178 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4126611726 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5967801454 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1563243249 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3099234307 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2041414382 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5298340938 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2570489843 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4028888696 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2324980283 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.4203934844 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2041105012 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.3657796945 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3130978532 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5950962977 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2525982324 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4605822105 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1739645144 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.3984988334 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.1200373123 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.3196364935 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1814754432 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.3157059838 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.187751348 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4405758845 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2191046369 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4677894227 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1984127492 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4104368787 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2929382742 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4500167319 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1223581489 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3679278604 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0119991714 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0656655661 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.264346972 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4993975063 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.31801505 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5317972494 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2705929623 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5134621473 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.256831195 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4554987689 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.0897426047 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0174118264 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1924672099 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.181272453 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.386776605 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2603415771 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4241396601 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1963447008 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4236533517 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1559833307 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.3946264183 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1705385375 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3747437419 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.257036702 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.4598854693 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0825950269 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2748258429 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.039067574 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2124733373 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.192705772 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.444563462 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.078814153 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1049283878 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0644184223 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.2244706008 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0232220251 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.2052740772 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0620084814 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2964981916 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0414688547 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1387297621 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1352563368 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3763213166 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2591777223 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.3119832776 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2137844239 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5028557922 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3129908127 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3710290799 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1943812143 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4323832185 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.094824393 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.2713939288 - }, - { - "model":"deepseek\/deepseek-r1", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2072083108 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.494326253 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1112235198 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3299787275 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0771449577 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3001544411 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.034106218 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2505188758 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1799007611 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.4327545103 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.1112135368 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2588501418 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1978004928 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4602046776 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.1107790987 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.3358115304 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2373672543 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5260757727 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3285274303 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5590237808 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1460355551 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4052234374 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1252913378 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3214539752 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2406381299 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4810229449 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2249075936 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4437585001 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2160501071 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4706714315 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0795746221 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3174359519 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.1884259335 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.3764386215 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0405514883 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.171412569 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1808592893 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4337443828 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1334340896 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4279306348 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1870340741 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4752318502 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.1863630148 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3895554099 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1603232803 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.3829882205 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1004543306 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.2090205571 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.251982914 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5269588388 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3081294684 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5221618044 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3414011031 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5397513615 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3050701984 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5811833775 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1987665104 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4807089369 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.087795256 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3788573069 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3390983713 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5924658961 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3162103957 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.555772337 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3226520344 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5750653902 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3833943767 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.5822540388 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2910030635 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5391676429 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3135062284 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.503706011 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5414890567 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6901603131 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.65136344 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.814288256 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2479212607 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4998884286 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.2970650759 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5373052889 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2793086929 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4720611769 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1318325912 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3864569881 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3666373087 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5839902989 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2824386471 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5388064333 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3186845256 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5933794038 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4349494723 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6079740627 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0254287526 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1905763319 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0134279826 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.139589465 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1773927146 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3630036378 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0949811313 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.3340540429 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1872829527 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3848483899 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2170056607 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.485021658 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3564149867 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5972656918 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3029237977 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5276781303 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2011905527 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4093497027 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0276458775 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.1587376386 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2716919376 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5367224263 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3302090182 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5714655622 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2876911945 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5482159609 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2940448188 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5448923741 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.176124281 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4444880058 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1541945773 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.3924489747 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.188709393 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4084479035 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1138525523 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.3577532211 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2897223986 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5436301176 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2571014471 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.4731076434 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1827387853 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4769620326 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.0613359658 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.2105103816 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2802837747 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4893845985 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1179961209 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.3697436656 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1795230257 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4524836975 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2168596976 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4205029389 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2464293328 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.4995807582 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0931324834 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3308191122 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2346057729 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4744017815 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1793887241 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4368728644 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2249815138 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5077874682 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2125650621 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2904132435 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.1190564309 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3143397764 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0335265947 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1851037404 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.262210271 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.4965376896 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.0388474827 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.1762583779 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2204139642 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.45446535 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0629224316 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2969811617 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1410564792 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4583101239 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1751098097 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4545492979 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3249125796 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5545635633 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2625209874 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5042437741 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1520275352 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3905524229 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0999041852 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3246573528 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3358829505 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5672747548 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3376198793 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6603531936 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2178007242 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.46765621 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2366406548 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5016732556 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.267593114 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4875681627 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1669756152 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.397363268 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2018833256 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4087945979 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2219340338 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4586860241 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1638168729 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.3734737626 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.065274449 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.2801240967 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0385470126 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.298290272 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0422110832 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.2965714462 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2703816733 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5070503073 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1229430143 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3831192143 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.361635926 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5409988692 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3917357782 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5329271965 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.26251395 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5352694678 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3264296122 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5528035231 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3390879053 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5804013742 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3506563803 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.5763378703 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2742281839 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5442712896 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4398120524 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6249092429 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2246840158 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5244113055 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3594079605 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5726823578 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1637669376 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4551543683 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1390837831 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.3946845887 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1351642547 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.419979284 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.1783822717 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3420045131 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2141579133 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4737980192 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1530398832 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3776796544 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1442994366 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3828616588 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1186331454 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.3757270357 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.174243664 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4076535095 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1409080164 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4221024153 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2784378348 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5549743668 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3456876809 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5621225861 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.20948768 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4445988225 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1573888419 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.3946820302 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3413304111 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5865784347 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2452155196 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5046140378 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2074916818 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4529002152 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3320288719 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6201778863 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2510503336 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4902642703 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2210031154 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4920579152 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3219720359 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5542555367 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2813682269 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5156714123 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2423449507 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5073629744 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2856584071 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4617854306 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1265658126 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3696106678 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.053891041 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1488881792 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2862677011 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5325160504 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3443407299 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6019992774 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2507417591 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5348282824 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3383254859 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5585953363 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2049672714 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4409541903 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.291136983 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4722890493 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2144441644 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4937213115 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2635775721 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5351905044 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.290841669 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5420316418 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.4456194272 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6359307779 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0578608568 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2953456626 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0207182972 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1887439492 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2592635841 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5029886217 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0439234831 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1458942547 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1079708762 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3233208549 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0349481063 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3674901842 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0592346231 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2924121066 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0728848714 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2476630291 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2131332995 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4719991036 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.117107351 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2063865989 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2199013913 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4962775874 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1813558493 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3079482476 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1703974411 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.3893360635 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.2345318252 - }, - { - "model":"deepseek\/deepseek-r1-0528", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.453746534 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.311563429 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.5377881998 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2396682484 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4396755163 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.2166814512 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.4841980873 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.1410606918 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.4325882329 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.278130538 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5385292979 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2384630665 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3608782934 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3417210123 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5899435952 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2808352256 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5279437548 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.3412745465 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5826925715 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.390058702 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.597901367 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.2090093301 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4788545798 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.2182033882 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4418555529 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2884896124 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4868568502 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2753136771 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4912165901 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2570929124 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4913361477 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.2251696435 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4568625878 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3669654657 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.6010168651 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2158729847 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4281994918 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2496588936 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4813242802 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1805770029 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4382698967 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.2061377509 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.5025518062 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2617461401 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4657078066 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2478040527 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5055046978 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2299760176 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4382930134 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2988263355 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5680625724 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3895386095 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5693830083 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3954134409 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6048442845 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.4087786463 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6570148202 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.3123005535 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5830505467 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.335884194 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5760217609 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3897427607 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6211463161 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3989536756 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5939433432 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.4245142301 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.6390046108 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4937713215 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6856946146 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3112735556 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5352584968 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3040377019 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.513213381 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5700014681 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7211557733 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6711231356 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8279616884 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3344072959 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5751886204 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3905772718 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6166676981 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3438075851 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5772164708 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2647913841 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.489934723 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.4465199237 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6601536062 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3429301939 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6176510545 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3411643102 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5823324237 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.549646027 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7334440232 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.020521917 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2363991566 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.038851639 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2330289804 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3431336081 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5478126679 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1775808906 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4530128509 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2923469792 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.5018131258 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2792533669 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5455037681 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.4434404142 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6607832834 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4308263411 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6353494719 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.3988769415 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.6179037667 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1507190186 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4178703457 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3268473222 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5875784187 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3853964262 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.644268597 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3626330278 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5869542517 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.4368014195 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6964617832 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2899610129 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.5189509889 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2768774018 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.5137444144 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.3472094652 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.5708345321 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2724432186 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5503510798 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3422169639 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5695643744 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3590717293 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5878176707 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3331002174 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.579659503 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.336969824 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4913494739 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3700621486 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5837249923 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3088767184 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5884562937 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2509895727 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.5504416013 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.3061931512 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5755019454 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3918191618 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.6170794637 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1880645416 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3989811117 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.3364506705 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.568444139 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3756915681 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5684194735 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2805488398 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5298229011 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2739358937 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3591923755 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.1644396626 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3700254848 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.1334761711 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.430142277 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3953125797 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.6309277281 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.3149005177 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5121389124 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3477763054 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5972164517 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2678562502 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.5016256548 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2950679588 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5434984641 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2063059259 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5248165256 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3735487132 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5985929462 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2991127987 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5530820193 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3338185652 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5787491818 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2806196555 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4516145469 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.4099406931 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6343459464 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4650961929 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.742377276 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.3081644584 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.567441399 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2529515223 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5121166935 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.4035620418 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.6298286173 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2518010194 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5019977224 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2807966919 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5304954689 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.4120530736 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6593260342 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.2237313135 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4999468628 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.2341852741 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.5553049856 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1546333274 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3730093916 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0699361494 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.4093787348 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3421886958 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5848387431 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2784394077 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.5108902329 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4372599799 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.658993109 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4174871385 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5931595705 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2722413511 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5398004754 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3757453667 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.579529149 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3198419121 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.578067895 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.5056809967 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6966172136 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3154573291 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5600439488 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5650725553 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.7127688163 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2839962776 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5461994333 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4589390255 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6469214364 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.2696305636 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.5244991633 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.3319598047 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5861214096 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3972794455 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.6232130593 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.3415088181 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.5390152372 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2864590726 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5427330367 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.3306843079 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4979151965 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1795249187 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.439139386 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.2284701542 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.5463949389 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.284867651 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.5244275819 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.2202968782 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.5048544071 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2893867971 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5578776437 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4400648152 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6115459213 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.32232697 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.5111738773 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2383157401 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5175978358 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3617201239 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5926536872 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4685613196 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6911608485 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.4043048116 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5944297519 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.499303533 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.7212747243 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2819090872 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5410426012 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2951438389 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5712604905 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.4243816819 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6336752528 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3644641049 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5789291672 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2251503741 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4903696181 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2873179741 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.5146227404 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.3047162219 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5499065244 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.4136384398 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5513959132 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2867986153 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.4858296113 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.101641187 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2278398804 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2782856704 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5422402953 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.419062749 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6455051222 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.3049575256 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5755673238 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4056417811 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.6001236102 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0390495324 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.2184695701 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0636422012 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.296360163 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.31244285 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5630479496 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2911146353 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4826415387 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2813736334 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5350839669 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3532409319 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.6242374823 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3436662566 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.59026429 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.447007323 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6624838094 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0835563722 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.3389944349 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.1150505644 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.3347031666 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.3135484165 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5406963315 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1901478921 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.2245996516 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.2533217863 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4979177365 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.1440335108 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4965120865 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.1607554286 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.4105347932 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1645681144 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.349083777 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2392191989 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4953232912 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2147687469 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2691081013 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.3075560511 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5721434339 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.3628421643 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3921364269 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.3475884805 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.6151377801 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.3045492612 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5663641807 - }, - { - "model":"google\/gemini-2.0-flash-001", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2499368982 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4866856816 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2032851597 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4298526595 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1425831777 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.417812484 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0942003912 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.3936431694 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2720072911 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5061425801 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.1852281481 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3131355766 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2661119281 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5602064313 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.194000916 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4535351144 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2723646085 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.523730035 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3878873306 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.592059186 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1616972069 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4328883781 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1751707508 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4233167915 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2181300738 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4481685644 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.215871503 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.454646328 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2489741176 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4653663899 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1897550206 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4113322824 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2828158417 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5179193605 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1666719193 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4162476616 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2101918607 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4594084738 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.171792145 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4130816559 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.2000799381 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4988072587 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.3128559705 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4876970107 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2526226356 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5007947756 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2513374937 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4492954272 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2672489266 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4952818431 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3639249804 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5538942616 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.4195422704 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6085776274 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3993771419 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6357624414 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.260213991 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4864597898 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2793994268 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.508602676 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3077322035 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5442587621 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3445618242 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.56756333 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3382313883 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5705047774 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5354923841 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.7125417889 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3210907235 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5292167329 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3443988404 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5424093748 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4617498931 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6321211549 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5951204739 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7613397345 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3547349164 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5728186386 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3576086442 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6061019948 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2676796828 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5663114288 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2591969049 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4814104167 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.4277064952 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6325003325 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3269491361 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.612989238 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2978444639 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5625976718 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5244850062 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7081676298 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0361796916 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.195120399 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0308475815 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1890051706 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3150102324 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5340625085 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.2069349026 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4761478869 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.218966139 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4181042336 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2395753169 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5013466224 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.4286739426 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6359453866 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4344801023 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6474630132 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.3063272533 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.5583492368 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1757166109 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4152865266 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.274174318 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5398050773 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2927577916 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.56675535 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3126497326 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5733116043 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3882402101 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6624914478 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1503374245 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.387578878 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2586477386 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4926307711 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.232232342 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4479000894 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2119661542 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5179791668 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3116016155 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5537049588 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3440495149 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6004337743 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.289402493 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5452343766 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1879084772 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3787664659 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3561823456 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5619277442 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2609272138 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5344454302 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2053088231 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.491430616 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.4063809808 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.6167229896 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3195578223 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5643490342 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.2319063764 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.4461032467 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2485030856 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4688750005 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3577614115 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5550425425 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2106519512 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4804338475 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2003821738 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2850409306 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.1057085266 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2715159938 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0708970987 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.3583242702 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3007758183 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5378616534 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2795823496 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.510712197 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.212326315 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4808320621 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2353530898 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.498918765 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2180528106 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4350629264 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1752962142 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4941543502 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3848912948 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.6007084641 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.3023908744 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5415851472 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2906692433 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5058194876 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2450137243 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4736891146 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3362256422 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5937301471 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4506461939 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7014575648 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2814491551 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.5513455988 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2724659132 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5135188138 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3218775268 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5549355351 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2676260775 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5006126727 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2807256089 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5254117929 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.321808319 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5984422632 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1741946793 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.3946218629 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1729000095 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.501423047 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1059783758 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3306681685 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.044447102 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.379164388 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3044074075 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5411406602 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2312884216 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.5063263205 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3792889323 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6001893022 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4267930078 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.583867208 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2910351794 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5387220968 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.352446244 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5847615168 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3275326122 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5672682921 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4390630519 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6588223005 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3171625415 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5772460535 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4778809926 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6619427768 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.266439807 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5101737799 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4085478092 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5658655698 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.15500158 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3976950814 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2871474268 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5626056331 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3431093451 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5329383828 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2838954554 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.48412 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.28322243 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5207906875 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2943621784 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4749986301 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1334599567 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3857630332 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1122823827 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4635739463 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2156343628 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4198074415 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.2207115321 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.5118672399 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2933759432 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5710698033 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4425485023 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6215234533 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2152161054 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3971586695 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2374945192 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5272097328 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3335095456 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5683382887 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4033731642 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6519271741 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2763965367 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.520411275 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.4350912598 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6853284539 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2415112532 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4752276392 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2730244696 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5683000198 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3528600199 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5850279702 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3231474889 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5470305035 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.208524293 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4271785057 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2278942993 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4700484002 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2681599618 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5133569475 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3973895019 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.564303282 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1768961884 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.348072496 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1310924895 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2594660098 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2806311806 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5431910468 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3916346072 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6289997941 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.3007101794 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5519617104 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.319789332 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.540431455 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.025133561 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1190888288 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.018289241 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1529779827 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2859928961 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5295533045 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.3010091175 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.493353195 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2407351505 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4734226532 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3363563156 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5891323556 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3170290484 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5491010642 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.4032471641 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6394566771 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0638721925 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2417596357 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0586926041 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2518105026 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2746262088 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4924092277 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1648044562 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.2037009528 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.2251844915 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4461128395 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.134895078 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4836355083 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.1141869405 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3128047258 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1341626528 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2926500144 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1826865099 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4272648905 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1499743312 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2347308661 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2557280993 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5212143675 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2610248692 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.314329989 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.2674818373 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.5139550602 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.2723879605 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.567169258 - }, - { - "model":"google\/gemini-2.0-flash-lite-001", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.342116281 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.559017125 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2612038772 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4610162591 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.2362598693 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.4567970323 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.1787109448 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.4308196228 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2883756135 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5452929372 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2563045907 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3587997566 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3529391424 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.6061230642 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.3735935027 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.6118752881 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.321926202 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5672345783 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3971969927 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5997335085 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.178647434 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.459184816 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1766325657 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.42829263 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2689181562 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.5103302194 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2712387895 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4721454199 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.302725237 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.542445303 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.2074435657 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4358785934 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3306584572 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5470737398 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1839015438 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3995221223 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2402619776 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4816842061 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.2017479595 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4438028104 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1964434077 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4658109118 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.3578761246 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.5250650323 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.3118354834 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5301057957 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2712094702 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4554148161 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.3106246769 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5720599098 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3532786899 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5492979392 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.4167458111 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6414773714 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.4574369641 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6642298649 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.3171272191 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5378911972 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.3576320675 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5996552124 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3763167038 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6069984198 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.412011399 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5948875971 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.4097540667 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.627572506 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5159030608 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6896498523 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3297475202 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5587828835 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3865296224 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5630460332 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3392082462 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.572145108 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3956672126 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6257285571 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3764428485 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5794911256 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.3019679958 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.5294391762 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3967317865 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6195138455 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3512547173 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6404359092 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3566291662 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5901709379 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5810870953 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7402195597 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0654195918 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2431272498 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0469027058 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2599881115 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3528703899 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5605109395 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.180119873 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4654772276 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2852268084 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4820918601 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.3191497109 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5714009602 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.0284513224 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.1002460472 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.139929051 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.2620347708 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.336133928 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.545638091 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1683321583 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4261207547 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3151646581 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5599039863 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.4642022823 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.6720003623 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3580586993 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5973680765 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.466208483 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.7131031141 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2779082008 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.5108078595 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2835120188 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.5067796001 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2679844764 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.5037933164 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.243508886 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5329216971 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3211161163 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5700208797 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3654943432 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5949572053 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3652971414 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.605823173 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3680693686 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4700002965 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3627331006 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5928422481 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3781416178 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.6275555619 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2553025069 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.5408614418 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.3062994849 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.58504635 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3700483899 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.6006134318 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.2315452529 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.4617416997 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2885658633 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5512502051 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3602561303 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5568929694 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2490831768 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5186737173 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2780296298 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3546301665 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0591613845 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2662007935 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0779434955 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.3095367271 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3908377774 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.6260985434 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.3130016025 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5391710538 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3471192 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5771683143 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2207082106 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4870892013 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2993063676 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5474937127 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2153101678 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5379094165 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3532448793 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5657281022 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2774888867 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5389145892 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3233458031 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5703875576 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2274584553 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4665758709 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3746934831 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6046609636 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4734156929 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7284733826 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.3552781219 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.5977013775 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.3211140622 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.544805929 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3774439938 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.599804205 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2861323795 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5180107937 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.312116976 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5463170004 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3780530389 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6232733213 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.2177968416 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4415294523 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1367315108 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4585024296 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1871006972 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.4365658925 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.106229994 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.4277182017 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3701266209 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.6213278685 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.3031726243 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.5214945108 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4366998679 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.657751239 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4527636476 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.6078708965 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.3170527901 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5593259189 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.381029585 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5993345379 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3912183043 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5915454866 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4626670594 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6755172019 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3285564053 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5835489949 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5863602394 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.7345305045 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2800732142 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5572474453 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4692280866 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6613842883 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.2768738298 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.5136190092 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2743310586 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5596031593 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3271104301 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5357780664 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2320778637 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4100893183 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2632131459 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4981476408 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.3150668549 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.466344362 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1425864886 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3921687091 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1876717865 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4923376927 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2921978364 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.5121729513 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.2437506181 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.511793128 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.3070769379 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5829431146 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4562210568 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6349957477 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2923338131 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4917795718 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2448808161 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5213243396 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.340913979 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5878242881 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4428192719 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6769035337 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.3616286251 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5800788406 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.4820281618 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.7239645292 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2700100505 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5372825559 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.30759425 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5725597295 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.4465523529 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6649928543 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.4129762096 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.6159040363 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.3236889282 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.562821135 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.4114706745 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.560767027 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2476296934 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.5033069835 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1179117378 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2587205011 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3291780472 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5900439285 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.4285247051 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6508035663 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.3407035036 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.583433778 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4167194618 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.6135073244 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.3449466128 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5552120384 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.3046252906 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.493749829 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2583582755 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.483302551 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3579243963 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.6083657804 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3536805169 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.589790723 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.4655851302 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6703129046 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0932068478 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.3792197219 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.1171143464 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.3384795969 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2793739299 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5145166794 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1321430026 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.176594989 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1979226992 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4588070152 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.1181506898 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4718021868 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.1110331374 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3622261661 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1764819134 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.3463753843 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2753706711 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.5198869679 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.2207873686 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2915581098 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.3429319501 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.5895603725 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.2713803282 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5773152323 - }, - { - "model":"google\/gemini-2.5-flash", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0532606841 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.1073712755 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0366084106 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.0918534276 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.0152635235 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.0888307029 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.2298187784 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.0558323892 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.0713257426 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.1072693099 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.1463494979 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.2497593431 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0094322191 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.1037916124 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.0001965409 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.0267044753 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.1125538275 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.0146450668 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.0694240797 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0347588238 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.1419212765 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.0582570743 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.1132092265 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0098853623 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.0165633766 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0288866262 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.0765030508 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1414592386 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.2511168982 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.050165656 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.059692947 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1732325986 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.320908965 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.1653880539 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.2211462144 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.1467739974 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.1754458302 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.0507024887 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.1030268479 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1697494021 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.2799880729 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2781125894 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4200022747 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.1216206159 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.2069979707 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.120693374 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.1968497699 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.1189162738 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.1846074997 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.1307394464 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.2486210965 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3167547043 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.3513933765 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5455939196 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.5713088936 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.0379503072 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.1087301231 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.1171367611 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.2647620406 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.0584383584 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.1412915198 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.0396603748 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.0985321352 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.0985964312 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.156061678 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.0702762868 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.1586154477 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2019995088 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.3236372397 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.3648977534 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.4646316658 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.013121921 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1524824027 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.2518370758 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0406782903 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.0604419015 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1183279848 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1920673939 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1077307738 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.1927713334 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2445177715 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.3496977746 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.1662382153 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.1808826046 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1043372044 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.1458478186 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.021727044 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.0580949052 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.0418300745 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.0806441203 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.1378617741 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.2350595049 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.0504154457 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.1355433195 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.2636104799 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0732503288 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.1225115139 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0583064468 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1449618078 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1541511534 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.2680259178 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.1272530801 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.2851760515 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.0473708874 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.1079640005 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.0283865781 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.0449505586 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1349286875 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.2409523809 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0677858867 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.1661259662 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1074769757 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.1755162217 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1024243094 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.1478558086 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.1405067201 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.2106207596 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0416456555 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.0990623031 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.0795987945 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.1674316707 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.0674985081 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.1154928046 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.0672103499 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.1538606955 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.0731686523 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.0985812466 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2208894183 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.3047935907 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.006336512 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.0186059462 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.094505426 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.2017400541 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0141271464 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.0334534153 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0867381827 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.1588310511 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0055901791 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.0899799957 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.1294816588 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.2140376737 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1096418767 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.1484602611 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2123460731 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.355254469 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0244791954 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.137921948 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.1020473557 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.197831409 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.1388655603 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.3318625881 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1641407036 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.266680691 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0091866723 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.0286785733 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.1394131915 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.2765922512 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0483965296 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.1060232209 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.10034493 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.1553807871 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2797145372 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4388238124 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.034290559 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.0746115811 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0446825714 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.2751100361 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.035737951 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1320906069 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0471165976 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1649158656 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1717883762 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.2829924006 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0473366133 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.0472659921 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3123594633 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.4403827935 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.1719687661 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.1882752522 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.0749300244 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.1200920019 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.1830386377 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.2502216632 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.1361030105 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.2437066717 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.1732366992 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.2953051043 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1191095692 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.2528032318 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.1851411977 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.2745749365 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.0328251853 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.0948529778 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2486142719 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.3250953964 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1260552814 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.167920313 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.1569836743 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.2025766659 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0375021678 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.0518133834 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.045375844 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.0834549749 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1243674491 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.2280537353 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0291939407 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1082649083 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0370853459 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1174899825 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1757682146 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.3008766306 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2082330564 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.2618628182 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1395902324 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.2174681725 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.119440034 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.2479022501 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1807911166 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.3244473544 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.1337368217 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.2173463535 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.0167265048 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.0601986184 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.2110013881 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.3419482007 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1301054745 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.2254627309 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1643043557 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.2361420263 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.1225039269 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.198958675 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.0931461339 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.1419037126 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.0329651295 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.0924755182 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.0726933467 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.0966899881 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.0627836379 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.1537747644 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2746649389 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.4031644529 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1060274283 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.1886969167 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.044287435 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.1034692205 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2528660307 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.319204417 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1044041173 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.166665052 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.0152843146 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1294358837 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.2479602917 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2169787191 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3030152567 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2322985671 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.3328943549 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.0600208887 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.1457445652 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.0006881025 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0093430764 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.0639334201 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0290365467 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0799597164 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.0058978605 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.0483361134 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1308987845 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.1738613828 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1669728523 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.1905927635 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0414601372 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.0662245232 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0085456627 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.0621750153 - }, - { - "model":"google\/gemini-2.5-pro", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2073802913 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4889223975 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.0840656979 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3453561943 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.120094546 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3259782194 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0974181135 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.3477814679 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2393172056 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.4971254293 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2089212841 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3406916002 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2712045148 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5477096036 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.178052271 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.468064885 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2747843596 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5519960681 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2863967069 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5318173199 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1096694862 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4291604898 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1630720543 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3952400339 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1892846534 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4212342522 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1938470016 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4527968539 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2094379574 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4509809217 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1931386564 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4233010233 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2957522582 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5232039352 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2308361669 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4087255612 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.200456445 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4226152307 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1414132922 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4170843853 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.18522743 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4467570037 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2590661095 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4657468506 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2663307677 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.519985227 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1913577407 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4064669591 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.271237739 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5173954387 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3067537945 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5194482945 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3694979709 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.59081536 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.374702944 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6019503341 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2792699678 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5157552806 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2334415639 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.5128705295 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3536861453 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6024608455 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4031829559 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6234553711 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3189602129 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5548503533 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5229096392 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.7023434262 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3137252517 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5147981205 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3302929673 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.505425141 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3889146477 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.628092835 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.4660772497 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7280386297 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3593767686 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5668073679 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3662275621 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6102640711 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.286051969 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5373856549 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1816947237 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3981159206 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3579818144 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5889481625 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3403832088 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.590264879 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3180384008 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5571267732 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5778354146 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7490356238 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0380719948 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2066039108 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0259757351 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.147148937 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.34811918 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5482709715 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1618983325 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4411905252 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1925315551 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4032389241 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1992720083 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4905441802 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3722751955 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6207213131 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3378499277 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.554090013 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2879989689 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.5083598943 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1719225434 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3992950999 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3234067809 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5706707095 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3187264685 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5856828402 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3583744222 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5732194975 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3364664006 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6438910651 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.225000401 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4567066441 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1941055199 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4504811493 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1944477164 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4517028309 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.2035517344 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.489419705 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3074361781 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5178180754 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3505959215 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5955060476 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2581140706 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5395853617 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3298839393 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4471547552 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3177915441 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5387853038 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2549228547 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5322440265 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1983700044 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4843458319 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2920207746 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5345155349 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3702042307 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5776853975 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1498433716 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3652702605 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2858443353 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5499221943 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2481102245 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4829685786 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2305492704 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4826740501 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1746024172 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3073554703 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0913600379 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3305636235 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0269728382 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.3155017027 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3751831337 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.6085851316 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.259988405 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5046714005 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.306099972 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5370842801 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1736022871 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4305653856 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2826629018 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5215979873 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.238462643 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5334745774 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3161992509 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5479755911 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2133071404 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4660281027 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2370074805 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4804215458 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2399769139 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4726429935 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3104483533 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5705763492 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4229626959 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6856510383 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2227645269 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4888582617 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1869632744 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4322398057 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3115387303 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5342290246 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.22081567 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4878836055 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.3125704924 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5397676594 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3586968371 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6075205554 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1901221224 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.438728736 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1330024304 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4711022084 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0258426139 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2237241232 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0460531144 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3418147419 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3714452662 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5977153904 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2234825764 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4562477173 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4180718844 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6426219278 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3938693136 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5573992167 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2456102401 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5190609119 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.4265619216 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.6320824157 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3537745123 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5904429929 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4588664196 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6844540285 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3139442337 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5741447282 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5016049999 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6788048008 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2449777422 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5268764903 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3789708434 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5790333031 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1839360587 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.448997409 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1891835724 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.508623725 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3352727297 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5583215205 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2009000601 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4004383195 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2297304995 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5040607132 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2136543311 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3916393466 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1221415503 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3516954503 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.108688779 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4515663403 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2308889646 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4618048204 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1667302795 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4717296026 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2351861569 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5329036218 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4168384094 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6032787874 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2291561983 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4673987803 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2036733766 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5047620958 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3500384253 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5797456052 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4268868445 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.664863412 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.3005035588 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5210660172 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3401968092 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6217197146 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2662307086 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5053585639 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.3044345778 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5714036731 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3330093484 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5694168709 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.257812168 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5167002436 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2176170344 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4496406258 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2605536967 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4816066849 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2992360169 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5585599708 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.4269079012 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5719718715 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2042995208 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.416626147 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0833250166 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1982489294 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2832304201 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5467240003 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3273464288 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5827048506 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2813742416 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5413704266 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4063054094 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.6020718231 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0495917134 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1711087397 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0358872001 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2761667256 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.322448107 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5887654616 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2148139783 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4226865444 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2829644119 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5194956482 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2911955464 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5560139888 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2745000434 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5206422805 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.4261790941 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6358462464 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0696458062 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2461140434 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0728989985 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2267265908 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2072386748 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4628288648 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1471425714 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1971299212 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.253783308 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4487387303 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0662544821 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3784904721 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0887390501 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3201148841 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1178050815 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.307433063 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2253512269 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4949150094 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1897306863 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2561574259 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2201641871 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5051068628 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2508351517 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3110461024 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.252616884 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4822778382 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.1940901676 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.4661416094 - }, - { - "model":"google\/gemini-flash-1.5", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2041309024 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4630820951 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1453469275 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3874336138 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1059711376 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.300568481 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0110669593 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2141540563 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1906134629 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.4467868389 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2001643223 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3225170104 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1876459632 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4830875841 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.1830944017 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4175337587 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.260683336 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5383651277 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2962406565 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5190026627 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1438491224 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.418499848 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1044262978 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3577242047 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1675595946 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4187188467 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1545869288 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4031218248 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1892328534 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4434206925 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1070430926 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3318636339 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3180630074 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5401606876 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1993490206 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4215901923 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1604267099 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4139767864 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1651025864 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3978212407 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.19213953 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4512512424 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2406657525 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4536513075 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2519150677 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4966963131 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2057435019 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4016427491 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2183929994 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4877941086 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2700916391 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5150577414 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.2900668497 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5379961095 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3193377157 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5978978692 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2457083208 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4937183307 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.1424911854 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3546559531 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2481120403 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5339550423 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3131426524 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5548197404 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3440655166 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.563902418 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4305522274 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6477508732 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2080428665 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4376921278 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3106300811 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4971105137 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4113463435 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6260248317 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5310035709 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7595845064 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3031284355 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5241309352 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3289699508 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5811203167 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2483616515 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5338391625 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1651229998 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3923963113 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3057177881 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5523945263 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2711892461 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5802332073 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3349110908 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.564806297 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4574014191 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6626552528 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0488154154 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1944904286 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0115014356 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1082073343 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2954497906 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5086877895 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1582270271 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4201411039 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1697291765 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4073157654 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.139672818 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4450194819 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3184042229 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5712698408 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3659029431 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5796195236 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2791185419 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.5226615992 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.145820804 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3820953887 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2313008892 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4915348458 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2765024802 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5261755337 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2301748885 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4682741896 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3346592082 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6514874668 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.154261694 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3957095627 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1177946719 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.341868335 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2239397579 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4512212104 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.135748348 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4408716957 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2724260509 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5200202435 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2753225284 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5457466615 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2664966821 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5386982677 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2651365589 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.409095006 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2326358655 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4815897231 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2187928356 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.4896578943 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1666068635 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4554883841 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1547742726 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4534139462 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2647824193 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5269086196 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1635334444 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3431273828 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.247746183 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.506339637 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1775009719 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4300321597 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2402657185 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.497198112 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.3100527074 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3696197774 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0872330227 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.3059813913 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0010116202 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1893341465 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3447519877 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.578789784 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2508560655 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4987822313 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2732982319 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5233285219 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.137657899 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3935929024 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2082275626 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4639776287 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1350252624 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4834543859 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2600500491 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5029669853 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2091322046 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4711774201 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2256246926 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4703189943 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2254492518 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.447826525 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2986303081 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5546917725 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3680194341 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6778287705 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1979480779 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4791457508 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1536786708 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4315811907 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2955515679 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5399574649 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2350766648 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4890671168 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2583853642 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5143387984 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3585971813 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6255063069 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1562574059 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.378833839 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1000795039 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.447037349 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0981161875 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3370208163 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.040931235 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3615428475 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2626677598 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.4970567085 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1807466012 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4219189716 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3706063992 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6167676482 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4234596823 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5629443923 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2754265608 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5207065369 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3700040895 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5924241261 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2971403532 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5321068893 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4364286549 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6509885745 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.27702997 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5437386483 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4580925611 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6514836722 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2300270544 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4839384065 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4236492288 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6116207052 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1602143293 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3793757948 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1370228414 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.4797772284 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2571204202 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4946608155 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0869374651 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3119061498 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.1981443603 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4536105905 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2278086127 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4013315084 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0939343156 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3129229613 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.074740365 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4045773842 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2273071628 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4497946959 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1824497409 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4972329945 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2541965029 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5234491687 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3308712415 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5512495988 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1522391036 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3508292995 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1514782919 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4909144205 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2959760233 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5398896148 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4357891553 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6529723913 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2501435914 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5088299265 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.262372343 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5806899403 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2450100573 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4918691312 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2434733519 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5120095348 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3568851036 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5825326367 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3112091725 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5431414206 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1741933649 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4272342177 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1811584685 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4001890626 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2544201673 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5081271409 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2942923294 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4479604827 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0881111208 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3173214379 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0384668791 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1259439982 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2563119866 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5291012922 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3009595898 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5854044281 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2567288533 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5177571061 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3201007033 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5323037228 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0115980217 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.115883071 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0129780747 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1450749981 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2402951661 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5033005385 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2302239803 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4066956434 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.203750264 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4979829233 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2162945849 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4941278712 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2537752957 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5073147534 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3583753747 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6253917282 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.1081430594 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2665454299 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0142970887 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1489810124 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1700904158 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.421111634 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1213993524 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1655788185 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1531171972 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3828830786 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0483942569 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3116951706 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0816098185 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2781732759 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.041496472 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.202397124 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1783312983 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4423885999 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1632112014 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2297357227 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.249810194 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5151255506 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1854861198 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2463326959 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1729786376 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4189697233 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0866404913 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.3943277627 - }, - { - "model":"google\/gemini-flash-1.5-8b", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2177971147 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4738076987 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1414064724 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3965739567 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.076595229 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.2493366365 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0440715947 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2820233612 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1913062339 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.4296053228 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.13443556 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2528930204 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.24265587 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4918380331 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2104382871 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.456050442 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2891206499 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5438550217 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3184721364 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5483731849 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1664804364 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4585261833 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1299183594 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3752977557 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1618648119 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4104839109 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2266738862 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4315390742 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2602059805 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4987515978 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0907943093 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.348768221 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3277667824 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5267403611 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2123273366 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.408906638 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1880331404 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4234748209 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1517877566 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.423956163 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.148851004 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4514291775 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.3105472783 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.498684126 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2430984589 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4969060141 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1699224465 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3964402252 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2907230812 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5148223626 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3473636391 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5442574441 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3650597419 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5512750223 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.293824845 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5724817779 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1983726871 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4779908235 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0792877335 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3908004248 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2863884915 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5641108436 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.352498756 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5860513143 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3577876868 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5943423055 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5344280565 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.7084649844 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2914236052 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5147962724 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3761179017 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5590147212 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.452427177 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6525566656 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5467976399 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7780833183 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3554331718 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5997743406 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3847830842 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6191109047 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2579194729 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5351839762 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1571676635 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4046770996 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3771043132 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5835797455 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3564426025 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6107274367 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2846092378 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5655970541 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5192984544 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7020040834 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0220051815 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1861453784 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0227307294 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1850492522 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2796934014 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5155626456 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1979202011 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4528880823 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1595296755 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3859356797 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.171830216 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4608354018 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3520691191 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6035990708 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3987037224 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6195037668 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2709410734 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4976144005 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1150407607 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3709160058 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2903150375 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5392715859 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3460432788 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.6009670508 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3377417704 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5674360496 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3534620252 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6680177029 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1600009223 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3857586031 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1602266912 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4091024664 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1546473042 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.3985794204 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1752645287 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4668449261 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3356485456 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5684527887 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3627134123 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6050822949 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2481856237 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5180749152 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.242508046 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4046420215 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3019627022 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5133980923 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2316517545 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5189963647 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2187004813 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4910590831 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2108939118 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4375825873 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3274744668 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5605813039 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0800539722 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3336188156 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2692189197 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5290912174 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2869741566 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5072256514 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2172591082 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.478962626 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2169046229 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3151387909 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0905061152 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.321707617 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0361920973 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.222315171 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3100950481 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.558054933 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2213152575 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4821662369 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3126340837 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5601639768 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1875297747 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4394137195 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2502298144 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5206889602 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1301910408 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4488625613 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2809005667 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5466717628 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2287455417 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4915489263 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3395095603 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5877742809 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2222923122 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4572688692 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3794800258 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6256125923 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3593747877 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.664135376 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.251920694 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4662583176 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1647980206 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4166796691 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3022338928 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5587522289 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2252421952 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4768786292 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2793746981 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5246312011 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3496466203 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6032151622 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.119086784 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.3898511388 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0923649849 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4837931302 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0417850648 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2509675066 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0312813941 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.2886309955 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2613495089 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5009335042 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1648455996 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3943041737 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4164890636 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6375470445 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4166823661 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.575314128 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2913506513 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5431985912 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3723742743 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5891983505 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.340245547 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5681284927 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4702737577 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6903236014 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.272965046 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5527916308 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5195197328 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6892729705 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2862936285 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5264436928 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4800957551 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6618495803 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1701995093 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4262662427 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1513262342 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.4732082637 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2365858071 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4722212406 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2251623508 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4159341653 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.226689844 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4706510499 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2258552473 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4191499082 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0756830418 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3184767575 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0495523985 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.3971096934 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.1915993132 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4208812642 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1724511246 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4741419887 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.245439349 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.536270172 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3929818488 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5787667028 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1784974236 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4520828188 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1435021957 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4868234587 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3258404036 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5652149653 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4264864443 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6543542662 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.268709657 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5306834056 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3395981599 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6130756934 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2563448403 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5007966916 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2405135195 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5564984925 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.2787922254 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5420797212 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.259866454 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.48543634 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2018690154 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4335923466 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0972794658 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.327182503 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2400131449 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4851690277 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.383380628 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5430720239 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0736674948 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2974206944 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0241026131 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1246172628 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2757340333 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5244536559 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3185578758 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5765088485 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2918547905 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5631912653 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3283437369 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5565790802 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0359452883 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1779043042 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0305779168 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1838354035 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2595582459 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5043992681 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2142625601 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.429749938 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2595944841 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5081810113 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2601189518 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5225655991 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2279880384 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4835933272 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3478085621 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5968604742 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.065945115 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2358663461 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0540055322 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2390749172 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.246042863 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4917114856 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1017188886 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1707828137 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.167004472 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.400944552 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.066271851 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3937495329 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0472060067 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2924612708 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0525309984 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2304098638 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2124709579 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4811646042 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1392232 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2205120991 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1900086584 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4895930442 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2395565562 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3237759485 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1607803472 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4377738064 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.1538390263 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.4751516021 - }, - { - "model":"google\/gemma-3-27b-it", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"google\/translate-v2", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.3397504765 - }, - { - "model":"google\/translate-v2", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.560449359 - }, - { - "model":"google\/translate-v2", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.278826715 - }, - { - "model":"google\/translate-v2", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4815690002 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.2795001892 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.5119662189 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.1719218154 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.4619906072 - }, - { - "model":"google\/translate-v2", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.2955792162 - }, - { - "model":"google\/translate-v2", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5460142346 - }, - { - "model":"google\/translate-v2", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2550661243 - }, - { - "model":"google\/translate-v2", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.3516234079 - }, - { - "model":"google\/translate-v2", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3399225795 - }, - { - "model":"google\/translate-v2", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5942330704 - }, - { - "model":"google\/translate-v2", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2816905761 - }, - { - "model":"google\/translate-v2", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5676475667 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.3493985929 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5975748844 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3475330474 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5776317086 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.2092670256 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.466339127 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1728000073 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4285875773 - }, - { - "model":"google\/translate-v2", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2747967998 - }, - { - "model":"google\/translate-v2", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.5090740494 - }, - { - "model":"google\/translate-v2", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2401570931 - }, - { - "model":"google\/translate-v2", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4670149488 - }, - { - "model":"google\/translate-v2", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2674941424 - }, - { - "model":"google\/translate-v2", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.541891802 - }, - { - "model":"google\/translate-v2", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1979823055 - }, - { - "model":"google\/translate-v2", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.4521218857 - }, - { - "model":"google\/translate-v2", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3768023433 - }, - { - "model":"google\/translate-v2", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.6041064745 - }, - { - "model":"google\/translate-v2", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2943485815 - }, - { - "model":"google\/translate-v2", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.5100804178 - }, - { - "model":"google\/translate-v2", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2273493056 - }, - { - "model":"google\/translate-v2", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4822061401 - }, - { - "model":"google\/translate-v2", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1907459838 - }, - { - "model":"google\/translate-v2", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4304499853 - }, - { - "model":"google\/translate-v2", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.2698506992 - }, - { - "model":"google\/translate-v2", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.5332909304 - }, - { - "model":"google\/translate-v2", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.3805163094 - }, - { - "model":"google\/translate-v2", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.5444910857 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.3207673833 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5578909014 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1931718671 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4102436779 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2940937001 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5656960013 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.4105743367 - }, - { - "model":"google\/translate-v2", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.582274226 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.455106564 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6691241367 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.5332526559 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.7086055004 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2827542245 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.5361942504 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.3935667187 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.6110160857 - }, - { - "model":"google\/translate-v2", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3504448262 - }, - { - "model":"google\/translate-v2", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6171883377 - }, - { - "model":"google\/translate-v2", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4493928736 - }, - { - "model":"google\/translate-v2", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6238587383 - }, - { - "model":"google\/translate-v2", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.4085073951 - }, - { - "model":"google\/translate-v2", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.6323019852 - }, - { - "model":"google\/translate-v2", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.5247754427 - }, - { - "model":"google\/translate-v2", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.7142099767 - }, - { - "model":"google\/translate-v2", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3267673394 - }, - { - "model":"google\/translate-v2", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5607576056 - }, - { - "model":"google\/translate-v2", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3745254965 - }, - { - "model":"google\/translate-v2", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5676283692 - }, - { - "model":"google\/translate-v2", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.6256942034 - }, - { - "model":"google\/translate-v2", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7540191814 - }, - { - "model":"google\/translate-v2", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.70888051 - }, - { - "model":"google\/translate-v2", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8684926816 - }, - { - "model":"google\/translate-v2", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.350374858 - }, - { - "model":"google\/translate-v2", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5997407835 - }, - { - "model":"google\/translate-v2", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3746387789 - }, - { - "model":"google\/translate-v2", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6101421618 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3295944742 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5865092795 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2930219204 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.5297678901 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.4111750064 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6314637291 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.4316058282 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.6598411557 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3779603397 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.6286963509 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5835846952 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7468900473 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.1585736619 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.3595908619 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0281783964 - }, - { - "model":"google\/translate-v2", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2135990911 - }, - { - "model":"google\/translate-v2", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3338203117 - }, - { - "model":"google\/translate-v2", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.550900416 - }, - { - "model":"google\/translate-v2", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.2235904654 - }, - { - "model":"google\/translate-v2", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4889537149 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.3014462049 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.5332346012 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2941569015 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5452786239 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.432237812 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6378291521 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4001439439 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6257483281 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.3369838412 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.564308487 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.4344044669 - }, - { - "model":"google\/translate-v2", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.6654473209 - }, - { - "model":"google\/translate-v2", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3943233817 - }, - { - "model":"google\/translate-v2", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.6168947522 - }, - { - "model":"google\/translate-v2", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.5116660025 - }, - { - "model":"google\/translate-v2", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.730698655 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.3708158915 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.5993132477 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2705214178 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.504548883 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.3900081426 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.6052122639 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.3336718595 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.5750387432 - }, - { - "model":"google\/translate-v2", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3278554945 - }, - { - "model":"google\/translate-v2", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5743022789 - }, - { - "model":"google\/translate-v2", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3394020951 - }, - { - "model":"google\/translate-v2", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5938537899 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3409989486 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5959288844 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3711980077 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4848412412 - }, - { - "model":"google\/translate-v2", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3673114251 - }, - { - "model":"google\/translate-v2", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.6183652016 - }, - { - "model":"google\/translate-v2", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3709969529 - }, - { - "model":"google\/translate-v2", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.598464243 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.3570145905 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.5887718416 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.3713033391 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.604716209 - }, - { - "model":"google\/translate-v2", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3851664104 - }, - { - "model":"google\/translate-v2", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.6312237305 - }, - { - "model":"google\/translate-v2", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1874455996 - }, - { - "model":"google\/translate-v2", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.4426393743 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.3113098415 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5501022834 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3347737931 - }, - { - "model":"google\/translate-v2", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5576944014 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2822808126 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.5526101149 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2612977966 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3457225363 - }, - { - "model":"google\/translate-v2", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3348942842 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5861344551 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1311732143 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4350789061 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2903894802 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5623472971 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2190660395 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5006362228 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.339831623 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.590846484 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.3016318322 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5461894184 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3491068707 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5803894973 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.3274616019 - }, - { - "model":"google\/translate-v2", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.5109521029 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3962757824 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6224286451 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.5032472209 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7257127115 - }, - { - "model":"google\/translate-v2", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.3361081405 - }, - { - "model":"google\/translate-v2", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.5602875655 - }, - { - "model":"google\/translate-v2", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.282320421 - }, - { - "model":"google\/translate-v2", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4830195157 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.339447252 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5865985454 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.3120075365 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5354876043 - }, - { - "model":"google\/translate-v2", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.3500838996 - }, - { - "model":"google\/translate-v2", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5820135911 - }, - { - "model":"google\/translate-v2", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3833463355 - }, - { - "model":"google\/translate-v2", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.630764328 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.2752866209 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.5470670325 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.2362408388 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.5649412405 - }, - { - "model":"google\/translate-v2", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.2628008901 - }, - { - "model":"google\/translate-v2", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.5081811686 - }, - { - "model":"google\/translate-v2", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0993493347 - }, - { - "model":"google\/translate-v2", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.456860433 - }, - { - "model":"google\/translate-v2", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.3258874325 - }, - { - "model":"google\/translate-v2", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5886625327 - }, - { - "model":"google\/translate-v2", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.321631251 - }, - { - "model":"google\/translate-v2", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.5362369434 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4482674529 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.671945393 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.5160129517 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.6445374779 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.3157581247 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5682347228 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3771434243 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.6045220423 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3489983932 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5800455435 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.5650298473 - }, - { - "model":"google\/translate-v2", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.7365285421 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3390274579 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.592940935 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5470657372 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.7020072444 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.3013142128 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5656623498 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.5178438056 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6867971436 - }, - { - "model":"google\/translate-v2", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.3354195212 - }, - { - "model":"google\/translate-v2", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.5741211618 - }, - { - "model":"google\/translate-v2", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.3462677897 - }, - { - "model":"google\/translate-v2", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5833767681 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3412028977 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5889369863 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.3852572206 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.5784169857 - }, - { - "model":"google\/translate-v2", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2698751119 - }, - { - "model":"google\/translate-v2", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.5340401081 - }, - { - "model":"google\/translate-v2", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.4096366215 - }, - { - "model":"google\/translate-v2", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.5525266748 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.256568307 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.4900607089 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1305127177 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4214140091 - }, - { - "model":"google\/translate-v2", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.3156233999 - }, - { - "model":"google\/translate-v2", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.5490670273 - }, - { - "model":"google\/translate-v2", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1872166048 - }, - { - "model":"google\/translate-v2", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4920219369 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.321113344 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5864222708 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4884555721 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.6556325596 - }, - { - "model":"google\/translate-v2", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.352233215 - }, - { - "model":"google\/translate-v2", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.5470765309 - }, - { - "model":"google\/translate-v2", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1904552367 - }, - { - "model":"google\/translate-v2", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4562964405 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3843214006 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.6136910044 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.4689482853 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6877930778 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.4161610215 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.6455274177 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.5117468349 - }, - { - "model":"google\/translate-v2", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.7232937985 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.3318364746 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5738929543 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.3895968702 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.6277778554 - }, - { - "model":"google\/translate-v2", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.4077995927 - }, - { - "model":"google\/translate-v2", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6409257804 - }, - { - "model":"google\/translate-v2", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.485273374 - }, - { - "model":"google\/translate-v2", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.6710025354 - }, - { - "model":"google\/translate-v2", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.3269754516 - }, - { - "model":"google\/translate-v2", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5639027355 - }, - { - "model":"google\/translate-v2", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.465639801 - }, - { - "model":"google\/translate-v2", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.597395155 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.2639320429 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.5034191891 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.2139020366 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.3242506245 - }, - { - "model":"google\/translate-v2", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3693666266 - }, - { - "model":"google\/translate-v2", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.6134245868 - }, - { - "model":"google\/translate-v2", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3936947375 - }, - { - "model":"google\/translate-v2", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6497039072 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2907704167 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5698553329 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4401277302 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.6278421339 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.3080488172 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5695112482 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.3188563568 - }, - { - "model":"google\/translate-v2", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.5116789278 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.273125871 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5508470442 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3030324343 - }, - { - "model":"google\/translate-v2", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5648891805 - }, - { - "model":"google\/translate-v2", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.3603047797 - }, - { - "model":"google\/translate-v2", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5927521365 - }, - { - "model":"google\/translate-v2", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.4395780689 - }, - { - "model":"google\/translate-v2", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6552870615 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.3662176152 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.5856640284 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1500486487 - }, - { - "model":"google\/translate-v2", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.2162606152 - }, - { - "model":"google\/translate-v2", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.3163153725 - }, - { - "model":"google\/translate-v2", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.5712728237 - }, - { - "model":"google\/translate-v2", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.1386875315 - }, - { - "model":"google\/translate-v2", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.469849511 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.1925403782 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.4228528325 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0437670613 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2053727616 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2311956685 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.5099861434 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.281120015 - }, - { - "model":"google\/translate-v2", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.3263629293 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.3441147842 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.6121137924 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.4192686299 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.4558586669 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.353693059 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.607730412 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.3303018306 - }, - { - "model":"google\/translate-v2", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5960312224 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.0750313913 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.2689370364 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1097950919 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.2459305972 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.04291871 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.1992226055 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0012692029 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1055962738 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0135029462 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.1510010912 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0033288372 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0389705109 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.0592251547 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.2921276604 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.0366276845 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.2393327958 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.0837265107 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.3012065838 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.1120670716 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.2771296913 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0213908698 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.2206299292 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.0285726559 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.2326683564 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.0290259599 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.2238098591 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.0597935462 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.2388670431 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0254218054 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.1827114877 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0012328171 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1200208328 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.0888105743 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.2644372522 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0668939667 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.2520473985 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0148972561 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2336350172 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0047574121 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.1922357185 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0366814427 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.2731193887 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.0173677773 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.2136838993 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.0581882104 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.2702416532 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.0562052656 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.2181774858 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.021858254 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.2327499821 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.0253088472 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.1918662187 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.1238388635 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.3442960257 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.0243559813 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.2777667131 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0155834504 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.1833348617 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.1279598659 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1272159331 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.3713000806 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.1249879163 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.332725923 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.1784955678 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.4008799371 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.1782663616 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.4070825897 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.0708485888 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.2987254392 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.0335582401 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.2213744022 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.2797104835 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.4601471921 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.3392404298 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.5518674496 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1682956348 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.3705606944 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.1595479626 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.4040956812 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.0333586544 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.2582740293 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.0187498765 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.2265924477 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.1122756663 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.3654994366 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.0565617503 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.2914640343 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.1362099506 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.4039646029 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.2151785904 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.4309035319 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0308954874 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1818653 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0168607588 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1570666495 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.0426900866 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.2179546047 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0232938459 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.2007105106 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0254280801 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1890793851 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0144953008 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.1884647114 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.1658307051 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.3676480008 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.1711676323 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.360371738 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.0316097931 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.2354084259 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0324437189 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.2310907497 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1200885566 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.3203716958 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.1112414449 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.3175953836 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1399603895 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.3604113675 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.1312450274 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.3988827234 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0365286922 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.1990661561 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0199391634 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1497878674 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.0448341108 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.252793155 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0162978516 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.2157203276 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.0992729275 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.3331935567 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.1507170285 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.4062498972 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1222242366 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.3415186168 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1607198845 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.2706371796 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.0550016522 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.2468917982 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.017690474 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.2536001746 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.0436064509 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.2149063632 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.0078784523 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.1640159488 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0075610365 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.1887566902 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.0847212314 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.0397152583 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.2037873099 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.0137546968 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.1710566423 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.0564413481 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.3212656259 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.0674768387 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.1657544717 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.037013395 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2338208281 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0011308834 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1078312824 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.068704384 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.2935036324 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.0888104824 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.2740386167 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.039812774 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.2668100649 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0365460997 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2177619304 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0221745742 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.1989640913 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.016161822 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.1752502983 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.0532822129 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.2301007371 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.022639121 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.175884413 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.0411296617 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.2442292695 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0482775033 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.2211003771 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.1375979502 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.3518293272 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.1462646527 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.3886405702 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.2238699363 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.1606469353 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.0498052059 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.2651327526 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0286452462 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.2023398596 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.122262497 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.3515428019 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.1527676878 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4249238432 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0279631361 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2321541854 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0195222834 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1762015362 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0127402107 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1651498064 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0088808864 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1702476721 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.0366762006 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.1697470704 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0190635247 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.1343627089 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.2292007848 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.3716655897 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2126702079 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.3248815955 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1085949014 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.3346783911 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.1676106101 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.3907504991 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.1013445398 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.3063125264 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.2231857524 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.4401829864 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.0955350175 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.3286351702 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.1871630014 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.3655570607 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1005466956 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.3356518748 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.1363185356 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.3769111636 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0189997083 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.1919557381 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0165320564 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.1417103032 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0441421075 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.1739314177 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0182129294 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.119134604 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0302406554 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.1787247799 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0014746217 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.0931903615 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0192199252 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1851933727 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.016306816 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.1413916659 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0299103049 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1986832691 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0138432 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1610036541 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1315135307 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.3968249514 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.1220351802 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.3505317727 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.0605189037 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.2644052383 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0323301168 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.2153999563 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1638179638 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.3812064776 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2333909009 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.4689649165 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.0510060878 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.2132741272 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.0089146903 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.2314109768 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.0303451125 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.1748071119 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.0076177075 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.1986647775 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.1096331511 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.2888090685 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1206114883 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.2597106436 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0268228091 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.1751009974 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0015866917 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.1423262509 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.0382115226 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.2562543067 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.0125933293 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.1659603426 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0161407336 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.1568869137 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0241402636 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.0907273605 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.2666411269 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.0495223383 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.276468397 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1408426214 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.3768513401 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.1029189854 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.3606378352 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.023064469 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.0922502173 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1015661134 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.0891185343 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.281793335 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.0799576366 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.2661903898 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.0287030985 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.2124751899 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.0040250398 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.1706379305 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1389065496 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3708687542 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.1661316612 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.354399593 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0242583204 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.1929988599 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.012814538 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1551759179 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.0973024735 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3076531166 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0161682999 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.0635837055 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0478834907 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.208762819 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0095341532 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.150635966 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0080365175 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.1647602539 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0909640555 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1043191943 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3414129274 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.0651436117 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.1002582276 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1133138775 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.3754190494 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1672636279 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.232325354 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0206894768 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.1952546166 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0113901513 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1302687289 - }, - { - "model":"gryphe\/mythomax-l2-13b", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2152631134 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4621033585 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1438913245 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3637806215 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0488692805 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.2406135335 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0214880279 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2270659336 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1203676158 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.3241911739 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0165994228 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.1058904177 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2332719546 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4924788322 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.1544598614 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.403814105 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2609114367 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5167379854 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.1932802581 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.4648835751 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1270864308 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.3882289796 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1366193757 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3691581345 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1966421011 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4161867731 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1531147508 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3635575685 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.152657571 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4132859119 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0663682991 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3007728685 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2739426076 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5129388019 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1227671497 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3306179967 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.130005692 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3815764307 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.136654027 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3809883299 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.092815209 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4056492611 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2013843536 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.417738842 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2451140745 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4883780153 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1334730215 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.363000921 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1938367121 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4674774016 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2073541352 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.4438396219 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.260902514 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.4589913242 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.2033313823 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.4899690932 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1866723671 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.3936568086 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0494661624 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.320713519 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2435247423 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.501836375 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.1956638929 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4870538255 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2685056004 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.506818165 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3801657831 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6056477234 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2197756902 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4571715629 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.2911113336 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4641387139 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.2216031518 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.4429472312 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.3322887566 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.5824514758 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1640783778 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.434749516 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3126220052 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6038883227 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2147534918 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4874495537 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.094833194 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3292486732 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3074477197 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5168064726 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2380541489 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.552589393 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2261588318 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.4944996319 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4626001556 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6614963779 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0704357087 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.214215478 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0113955269 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1874434226 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2250377214 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4688956519 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1515937263 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4251249067 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0957125553 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3135046613 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1387309388 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.383923158 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3316295853 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5594083443 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3598049012 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5828568956 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1166136282 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.3855078109 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0887957809 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3187695245 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2237665442 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4896395702 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2614626337 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5280652466 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2236143729 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4912948296 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2910526755 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6005590773 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0845382562 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3303135434 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1267144204 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.3667670284 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1301757317 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.3568142061 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1161068297 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4147684511 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2150043089 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4693148389 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2998342329 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5545377546 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2131020144 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4845704057 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1446650781 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.2292145443 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1840709267 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4042090141 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1902389614 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.4796942089 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1500855 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4100527329 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1136442629 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4236240472 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.1899800627 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.4618900518 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.087831891 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.265907742 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1797031918 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.446466319 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1846339038 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4220658756 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1932392069 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4204852284 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.0476115004 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.0790735292 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0505101039 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2502174391 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0135029462 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1682326163 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2778994313 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.534066621 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.1661930328 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4001562798 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.212877318 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4587224182 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0873487304 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3409755146 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1013985932 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.3022487832 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0611289601 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3842495071 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2670990652 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5220692033 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1981851908 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4555314776 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2152417217 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4578207034 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1654073391 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3941079443 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2763114217 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5353874356 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3315044625 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6241092077 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2083780287 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4466015977 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0879797246 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.3441521948 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2353570133 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4846450712 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1632876087 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4158370821 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2136151785 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4670269701 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2202915792 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5233013945 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0757269477 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2848998148 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0329429353 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.193397393 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0206783974 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2005424268 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0171071488 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.239253642 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1677012885 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.4316686173 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1202133569 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3816566526 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.1891048622 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.4238091524 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2086330089 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.364326938 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1998083747 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4667760664 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3336132898 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5541703282 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2256776552 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4793769886 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3320822339 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6132478102 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1978207058 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.4682392821 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.409911871 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6078072484 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1730354472 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.454951133 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3073459183 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.554890569 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0841096684 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.2671346741 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0409000825 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.3185422263 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1343511225 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.385804118 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0662090824 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3030628402 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.1430431721 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.3858131555 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1602151366 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.357355981 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0399118136 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.2285191544 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0269601697 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.3006594703 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0705893599 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.26834967 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0988956652 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.3896116232 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2014976842 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.470196154 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2922893702 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.504298223 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1538893384 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4005055487 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.123816759 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4480197519 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2739803344 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5286596224 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3468145126 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6078627072 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.1798653655 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4290347271 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.1709237193 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.522080463 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1405088565 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4088204523 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2360333676 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5038093602 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.2507091988 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5067409185 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2998213366 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5112516267 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1029119511 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.37770665 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1507872741 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.3945276116 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2022308722 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4620737135 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2592698778 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4253112122 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.052152068 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2578037483 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0237309602 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0808540368 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2231619401 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4653230255 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2701457179 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5043061571 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2437336637 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5042825637 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3231504544 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5440009645 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0461126383 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1577698173 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0008007826 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1426831674 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.194871957 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4838690709 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1908512613 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3855814375 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.177513842 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.386230097 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2544813414 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4948262917 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2003733128 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4613442635 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3326058501 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5496097026 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0685259305 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.234718691 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0319335459 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1970529604 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1538409962 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4398974306 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0373503953 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.074080246 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0707946688 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.2881582575 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0204932467 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.2690069759 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0339322053 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2166922882 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0169319822 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1617793532 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.174955946 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4554476081 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1400846872 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.1809201358 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1869648376 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4664675781 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1696786467 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2325846099 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0515154857 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.2819357103 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0804926199 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.3473550746 - }, - { - "model":"meta-llama\/llama-3-70b-instruct", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2561356588 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.49648558 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2071032166 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4109686249 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1091965593 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3173514683 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0490503072 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2649288206 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1375609672 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.3710873948 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0526682247 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.1185738392 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2624553878 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5118695802 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2175864677 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4641969296 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2766513185 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5528193738 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3371708551 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5607841978 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1412140088 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4082636767 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1613157357 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3896124669 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2374604323 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4585263555 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2300327193 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.43482663 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.220118305 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4767360664 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0623766799 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.2986098722 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3261928856 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5339015736 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2330787096 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4427484336 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1767618659 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4088166263 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1381246624 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3730280956 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.161760748 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.450732576 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2170520787 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4098923096 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2731033294 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5159041397 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2075901182 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4086862509 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2623045124 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.512895511 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3328087961 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5013967236 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.36364203 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5712362729 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.2701190878 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.551918321 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1903904403 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4687496067 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0520773173 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3368230674 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3416129059 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5845038999 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2726323508 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5283879512 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3059254014 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5403551155 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4588037752 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6692431614 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.268843518 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.493449014 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3403168702 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5086292148 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5486108614 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6873139374 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.691450825 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8407064328 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3021494986 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5604145602 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3865320679 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6126903448 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3383579693 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5641679075 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2069613925 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4288746449 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3771163962 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5827777548 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3225619014 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5830090459 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2924853239 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5462367408 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4963618411 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.682573515 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0727409119 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1973579541 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0147511412 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1172295571 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.3220032872 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.537784261 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.2206953431 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4990640113 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1527043255 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4074071592 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1567942198 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4125213011 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3914590212 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6128431348 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.4189427376 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6262550579 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2338808528 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4544102907 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1243598882 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4051657211 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2678903597 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.520149627 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3361911519 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5583527487 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.290274553 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5143960108 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3789023659 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6751523776 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1640789976 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4462743519 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1964520184 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.3861299089 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1899251487 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.428775702 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1861284915 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4568269097 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.296179579 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5259369403 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3187240753 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5786749514 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3306727326 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5648258387 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.262512317 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4036795798 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2593989014 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4825732152 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2177203514 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.452576603 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2013677498 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4740327886 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2015887265 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4935901226 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3098535214 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5740290935 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0913702814 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3128837987 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2155836452 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4583181839 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2468017951 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4734415865 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2276258723 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4577745447 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.3046437152 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3825631739 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.1034813211 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2900727397 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0357350273 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2486408005 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3838720489 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5776118345 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.200683389 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4892134584 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2846718719 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5239506053 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.135802286 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3994207414 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1470288737 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.370597998 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0624257747 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3667603362 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.271926141 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5318057054 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2579782083 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4806502272 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2987546297 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.526361525 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1515083487 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4332812085 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3458633411 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5703964991 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4050410338 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6535211779 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2598947984 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4881220333 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2015903544 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4513077936 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.260901002 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4896606547 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1429173328 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4071639857 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2472539314 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4972878378 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2865823477 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5850706516 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0888165228 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2927704081 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0527161443 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.3183888298 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.024472012 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2143061298 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0100859589 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.2528047704 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.285587224 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5216659729 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1281773816 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3720376754 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4479091606 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6272058507 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4155919737 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5685427433 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2568309796 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5058927884 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3323405641 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5569354008 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3183331223 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5559144449 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4500659682 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6761551234 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2803966495 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5308215606 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5346576918 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.7024180686 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2051262499 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4837372958 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3699194641 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5770913921 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1806014296 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4291519278 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0966721561 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.3133383199 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2666171334 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4765001737 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.1168753501 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3522716786 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2270309753 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4841536531 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1352090178 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3326584955 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.067782587 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.2594122638 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0496762437 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.2555516699 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.1103212906 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.3317936338 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1178727843 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.3706477532 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2704960778 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5452851397 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.395555296 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5799018584 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.253350788 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.458172945 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1663559354 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4946292339 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3079953173 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5447429639 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3935950974 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6342780862 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2983575506 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5283269577 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3087303367 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6028401745 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2210855899 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4616487061 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2867265678 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5646798034 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.357367421 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5659157865 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3515649071 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5645706042 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1896909211 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4144350541 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.202234159 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4284203038 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2422208114 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4745802383 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3118363896 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4892437324 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0689409767 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2722531305 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0406781073 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0970886698 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2772171859 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5118206984 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3271066365 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5803660329 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2451818788 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4929183421 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.2948409512 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5143250682 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0677223644 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1661136189 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0419433658 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1567633534 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2584265792 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5020697076 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2624388601 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4452300688 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2640324297 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5196545965 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2748329219 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5341998684 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2872551102 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5360250569 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3575088107 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5879974234 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0892196115 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.266303191 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0392376693 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1442620012 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2174626032 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4686565248 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0941401506 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1690248565 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0921572696 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3072945662 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.052810575 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.2387421258 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0706373037 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2741240176 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0463945559 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1664955069 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2098595469 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4603190119 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.0831701767 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2065328404 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2619931658 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5251502482 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2725475868 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3148065512 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1611133093 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.379825756 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0702261565 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.3706020457 - }, - { - "model":"meta-llama\/llama-3.1-70b-instruct", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2279903683 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4722573355 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1435174722 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3991235315 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1058038471 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3048469769 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0491812173 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2808449794 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1452228976 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.3699202818 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0630164833 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.1755172285 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2933386948 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5447211689 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2166524228 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4496497227 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2997360932 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.552448295 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2837522278 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5150814494 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1559168311 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4268663808 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1391913129 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3773300026 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1936957127 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4274660929 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2338775014 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4546098648 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2240336457 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.456891102 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0249457171 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.237299794 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3606359676 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5477908661 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1847580734 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3911269476 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1576463626 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3910005157 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1690495289 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3930640761 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1487255467 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4407404732 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2175383868 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4344749015 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.3061950313 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5535821276 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1811004213 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3534531968 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2646772038 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5157826791 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.331255344 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5093924414 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3806413844 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5722776653 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.2863154138 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5492472281 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.181535472 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.4498383877 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0712120544 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3404533027 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2928040954 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5416342014 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3623393932 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5863625454 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3129672706 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5456543979 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4852409005 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6908376394 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2581336709 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4704308834 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3342775397 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5026657233 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5510215557 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6909834226 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6679215449 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.823262947 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3139331841 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5454623234 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3561290923 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.599796306 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3069040556 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5327832177 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2185015953 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4264089038 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3745780882 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5785175063 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2904415478 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5908280404 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2963449909 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5544997379 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5091700689 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.690320784 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0862885919 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2372420697 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0261732885 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.168027641 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.295613677 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5221513183 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1697878702 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4619303787 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1704488365 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4403441536 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1466885285 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4179596519 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.385731086 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6034244629 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.357812342 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5894721809 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2543010782 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4974062413 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1038893205 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3250555425 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.276319154 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5531277158 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3130631115 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.576837791 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3143630083 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5621460006 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3372718385 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6543894215 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1641186812 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4109013799 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1375093856 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.3810358014 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.2236376263 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4348813399 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1135128656 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4345857133 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2934122255 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5393871714 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3100077394 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5714730187 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.284322765 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5304019177 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2212745751 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.355950114 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2779520489 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5022141687 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2476437073 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.4805413308 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1839138217 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4860524069 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.170404008 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4549282359 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2759135194 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5427685716 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1237988917 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3265823778 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2379701997 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4807978998 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2068140088 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4615143451 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2508944927 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4998320266 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.210142355 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3127505848 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0890010757 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2623679578 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0406768013 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2265562343 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3934799806 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.6000359011 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2497480714 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5149579975 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2776870629 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5361437897 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.101231398 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3867790942 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.1710070826 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.3961869922 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0586978059 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3322172345 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2566290969 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5196341734 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2701878605 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4964908212 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3224419544 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5606155155 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1663847917 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4383662593 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3103894957 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5854645421 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3913336262 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6467989318 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2392486974 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4920626101 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2130836675 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4622075168 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2893481535 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5215715176 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1419402772 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4014256358 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.249611031 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4991029967 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2942599953 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5860228525 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0973366086 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.3232614896 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0552969578 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.3314346183 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0356994946 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2438060785 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0183592041 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3076327609 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2403245803 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5023246313 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1415388613 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4055113288 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4519562833 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6328740374 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3918546765 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.551810658 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2474554181 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.490135462 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3387973296 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5550868321 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3065957195 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5567333989 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4450524918 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6712742861 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2717880574 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5367497902 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4990357373 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6703309998 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2184907643 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4824691404 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3406091079 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5689518318 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1780482269 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4251975218 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1238628432 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.3651594596 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.2363136631 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4724628618 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.1012580658 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3226539734 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.1702954814 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4314499751 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1165218233 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3219658957 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0415760658 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.2726763268 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.059393934 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.3265574234 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0974935448 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.337395124 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0786475166 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.3616383914 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2468393111 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.497224405 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4165200238 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.60128551 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2331294709 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4605502791 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1664653883 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4762879225 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3181506443 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5441377883 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.384958542 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6364660715 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2373393477 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5017230165 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.2334583695 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5603415221 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2147204762 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4649686586 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2451735521 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5352886898 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3767393472 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5870286691 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3415510311 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5592933672 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1801710665 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4238537274 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1125461134 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.3456654305 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2385233061 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4987667959 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3202315883 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4748886274 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0653357736 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3152990905 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0468367135 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1314286197 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2786347493 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5143010521 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3361615644 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5732166456 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2384384134 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4989290832 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.4068700974 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5784786574 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0295327628 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.168725075 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.047075079 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2446929278 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.252958993 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5053979802 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2339821201 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.425306622 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.236632122 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4793696196 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.265421135 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5116987882 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2721498467 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5200799335 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3613736416 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5866656133 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0887261142 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2752257416 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0469317169 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2008453897 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1844305556 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4417239043 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1248600823 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1650750126 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0773908628 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.2972337309 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0211167911 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.2699477659 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0578164805 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2633608218 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0591720568 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1753356197 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2018552397 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4546838419 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1477972133 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.238559837 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2259180607 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5258178103 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2474954475 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2841722148 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1511814979 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.3667501588 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0635700737 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.3690469819 - }, - { - "model":"meta-llama\/llama-3.3-70b-instruct", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.3304277157 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.5443905094 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1851657228 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4035662808 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.109249521 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3018012797 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0601412463 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.3378209538 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.278050095 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.5030892659 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.2056413876 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.319475702 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.3412628716 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.6168944847 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2079777284 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4737326062 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.3288891576 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.560493221 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3370289388 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5616376735 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1877842343 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4430965745 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1822974398 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3877585115 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2820784803 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.5005600008 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1762767818 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3980584269 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2678950965 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.5139106802 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1062771627 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3373123707 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3378377362 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5424277928 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2039436913 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.413295677 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.2078798411 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4443945632 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1864199422 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4220840798 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1482839317 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4568926673 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2815774482 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4716025494 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2239129937 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4981762083 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1952639614 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.40510597 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.3130233588 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5569163893 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3322667951 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5140240989 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3401088117 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5684561927 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.4507021781 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6542740054 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.2563123252 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.517826657 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.2261228199 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.4918700987 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3778406936 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.6293186521 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4207498261 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6224036774 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.386239845 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.6110300223 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4508475568 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6653203029 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.3208174129 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5441131834 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3167289307 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5086510118 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5649266234 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.7240609445 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6160790992 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8133281991 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3152773331 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5548531112 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3335962816 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5859881472 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2974755741 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.548510072 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.198877694 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4016676481 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3394346908 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5772205685 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2961737536 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5732830973 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.3174175523 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5682518332 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5173973527 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.7056428374 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0282301718 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2509358266 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0302470726 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2143808411 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.336179684 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5448761462 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1255076156 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4301109075 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2176466652 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4472887488 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2048605344 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4547067722 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3796955055 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6056742688 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.382370623 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5922578575 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2583771315 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4968818998 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.116404849 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.355624191 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2893259192 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5696888872 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3664678104 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.6152145331 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2523550022 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5458277736 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3774331947 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6486159416 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2004372781 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4254008414 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.2594342647 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4939201844 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1883645682 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4379611856 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.166061461 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4725232576 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2923994901 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5447705341 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.4037821428 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6296260979 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.3130983776 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5536124921 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.3266051607 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4444671407 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3146151088 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5434123174 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.3068652176 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5696595268 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2107380254 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4991705013 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.331371608 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5825812793 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.3610313078 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.605032383 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1827077293 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.4206815495 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.273765965 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.5541955864 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2689874625 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5150630417 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2599758467 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4915631618 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1778560135 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2969773205 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0622795727 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2675764955 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0381251381 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2812186233 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2963258613 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5478698134 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2490501232 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.470734921 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2517024761 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5071223357 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1690507631 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4610312304 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.245769233 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4441647844 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1869691691 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.4993277276 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3697837131 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.598263628 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2866761532 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.5500074549 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3125095049 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5535296132 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2275791183 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4569915545 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.345465339 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5897544047 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4313143535 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.672362003 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.3075903861 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.578349632 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1897910105 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4717519215 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3557303786 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5879008408 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1851047496 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4783025401 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2465270522 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5037852263 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3149743955 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5635296931 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.1800815958 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4077560746 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0667158921 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.3678415876 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0536067183 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2934406231 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0212136796 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3367052821 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2884546263 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.5474826718 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.2053276951 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4215800492 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4246522462 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6364852561 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.439726575 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5806732389 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2708043929 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5320865131 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2827359953 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5407300006 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3451568022 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5845365036 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4582156105 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6491023878 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.3130092522 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5728467895 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.503492173 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6731092747 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2567420946 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5061211552 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.4025875747 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.6083538055 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1596218382 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3642420922 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.1505395069 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.4131000516 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3060556532 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5236224481 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2303677631 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4550652237 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2505378464 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4777570712 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.238193304 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4255249112 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.1176743345 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3404474685 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1289978109 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.426186094 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2374825749 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4257984603 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1278040594 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4360467159 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2878836927 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5437304451 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.4038152012 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5978824564 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2373274152 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4608187705 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1899355362 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4693159323 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.298223826 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5684457257 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3825937295 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6393626909 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.3425500041 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5680420054 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.2980763573 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5924685945 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2473591284 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5382867852 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.3068229029 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5637367471 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3462186566 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6055817314 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3822149946 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5783618359 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.2141866571 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4746491206 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.2383903304 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4750617701 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2652851581 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5278626321 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3718263092 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5255136074 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.1999962108 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.4183108341 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.1133461632 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.2113810541 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2978237586 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5152360665 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3447394658 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5850690403 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2589937034 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5348601679 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3544986277 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5913345073 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0650028377 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1912574022 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0525305732 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.2699878572 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.253549974 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5235064606 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2484556869 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.429442787 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2701010494 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.5241051692 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.3181656056 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5907906511 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2670924013 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5198891912 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3597766713 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6081806669 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0741464388 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2660826012 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0695734356 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2997632689 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2653942694 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4893923691 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1157826458 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1765005496 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1376742076 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3734228567 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0864151864 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4104899998 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0937886749 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3196766983 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.1093519063 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.3224056963 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.2204568545 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4894165826 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1569463992 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.232785021 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2703289724 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5473102513 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.317769874 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3676906362 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.2474518428 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.5019415605 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.19092807 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.4983188666 - }, - { - "model":"meta-llama\/llama-4-maverick", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1327783313 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.3636961218 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1557273583 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.3515058711 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0375398146 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.1986406573 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0015655622 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.0623964125 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0476085337 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.205897506 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0110136998 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0688367427 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.0927070911 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.3300356171 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.1909661669 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4391780261 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1548779531 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.3887963415 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.268706305 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.4959259833 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0374286633 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.276784029 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1363017113 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3536429421 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.106947781 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3169488071 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1479958867 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3761797641 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0312538317 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.2922637643 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0234894436 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.2225963414 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2171579973 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.456713607 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1681044686 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3440820027 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0298418752 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2580610439 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.041095966 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.2930729253 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0556571943 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.3548784075 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.066482798 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3295363828 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1569501012 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4039420627 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1151073387 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3126448605 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.095240952 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.3840548344 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.1876367188 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.4310988737 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.1916153649 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.4266920518 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.0532026402 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.2442653709 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0233838479 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.2071232952 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0030055044 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.1680771697 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1169562212 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.4547809891 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2167266047 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4629060689 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2546126219 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.4840060449 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3171866034 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.5752285995 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.1236158233 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.3922493462 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.1783375751 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4003787241 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3642023499 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.5697992815 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.4959810553 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7232313255 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1732534835 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4434970776 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3605235101 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5849733787 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.0868573088 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.3068232268 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.0883262705 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3294670602 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.1921595243 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4381909531 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2102834142 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.503934087 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.1763652726 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.4428784232 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.3772793055 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.5820724576 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0067893116 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2007893146 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0132875082 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1300679396 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1581139234 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3992847318 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1006122628 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.3529986856 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0310807341 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.2470826922 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0634023566 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.2955476351 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2788689746 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5417455941 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.2369610218 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.3765795877 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1094117889 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.3715115564 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0627224628 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3120135336 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1148528139 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.3969632133 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2004789157 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.4541545495 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.0855471394 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.3720740561 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.284365864 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.589202199 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0344910359 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.2176253825 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0020930717 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.0498013123 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.0788515324 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.3023201397 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0177354807 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.2429763441 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1849581121 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4562979327 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2591551222 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5311194854 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1462066826 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4299215293 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2371087689 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4064089202 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1102793601 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3718502317 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0999035402 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.3664761129 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.0537848954 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.3139411656 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.0666822222 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.3622322436 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0753739979 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.3445771251 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.1701733674 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.104202131 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.3590734072 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.0705324379 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.3025321109 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1107444823 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.3916459404 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1649626358 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2656552119 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0539171508 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2436825008 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1581276083 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.1658903033 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.4217933103 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.0803859812 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.3220461814 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1018185799 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.3881585962 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0840554004 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3260852936 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.036255172 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.2361171448 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0369324798 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3426601677 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.1675392326 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.4161590898 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.101823454 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.3124446375 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1012625471 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3580813711 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0891010327 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3158325956 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.1211248924 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.389246098 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.2367123999 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.5273473365 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1262296798 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.3480250641 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.104091386 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.3135377948 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.1488007297 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4132412315 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0478735067 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.3069618299 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.1523025562 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4096891017 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2469695748 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.524876157 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0369494139 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2350129808 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0015518794 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1140211549 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0103766134 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1821363344 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0003856632 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.0985339751 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1279029727 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.3504496172 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1158076498 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3583374616 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3550414512 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5626107823 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2784963846 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.4121299981 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1049411882 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.371724232 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2126550777 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.4754992095 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.1813353123 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4632560004 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3450201321 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.5827805827 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1323104842 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.3747307468 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.2733723845 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.5057937589 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.164734586 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4400610126 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2884407046 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5338739518 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0202859007 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.2084128437 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0129709626 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.1407028363 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0476641683 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.1691869095 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0111247819 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.151377306 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0307653909 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.2382457281 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0240096696 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.154444722 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0116292791 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1897831748 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.000876482 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.066397943 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0221364496 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.2036637198 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0300378344 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.146034089 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1551096033 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4297549368 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.200397515 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.4351193348 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.0631852964 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3127999721 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0328870671 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.285042966 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1749499193 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4691275614 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2454574882 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5348019826 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.1325294802 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4051925402 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.1631216823 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.4696161488 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1338729952 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3640492116 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1115670494 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4303510763 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.2374723306 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.4675485501 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2528444882 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.4794045124 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.00699528 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.2382738034 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0018437478 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.2225002567 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1140490825 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.3668015685 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.1317498141 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.2770372268 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0342955291 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2066101372 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0101983319 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0539238863 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1209396556 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.3409074931 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.270580586 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.537606561 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1285741979 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4057727321 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.1865914948 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4565231191 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0177160721 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1342994379 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0010895392 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0283042279 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1155003818 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.3250077925 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1195513435 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3158904676 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.0676473408 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.2672641675 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.0166808106 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.2361978954 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1452425625 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3941195385 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2680553268 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5055559664 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0218129891 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2316696377 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0005135911 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.0644762753 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.0721904827 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3322122834 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0963078281 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1506583582 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0658670408 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.2264957148 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0240888197 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.1589846026 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0361610953 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2209617429 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0152526027 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1066841292 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1360263411 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4135302369 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1427052583 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2190638456 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1516445239 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4517979691 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2058198052 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2799124898 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0158837296 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.2111229219 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0440027048 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1856579938 - }, - { - "model":"microsoft\/phi-4", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1381244544 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.3106084366 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1319133586 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.2261398681 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0101349522 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.0727946226 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0087636854 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.0326918009 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0143438883 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.0651139855 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0145699741 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1077126314 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.3303312588 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.0568249639 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.1985159581 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1428907436 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.3107041775 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.1849770017 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.2932088535 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0246413933 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.2220905764 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.0462912201 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.2082310898 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.021812522 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.1700035697 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.0687018163 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.213092048 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0516867052 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.2165108464 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0097728449 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1483692036 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.08262787 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.281005553 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0481979333 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.2232523474 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0805820584 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2555758551 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0296286693 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.1319853113 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0173366455 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.220616462 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.0065148659 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.1526491803 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.0459721625 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.2229551601 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.0233985631 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.1665184954 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.0355167863 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.2600874171 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.0323184525 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.1970289791 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.0892751266 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.2388608153 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.0416926889 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.1552666429 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.020551822 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.1292684598 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.0744822177 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.0527597248 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.3026154166 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.0777044688 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.3081482084 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.1237340737 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.3601104142 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.1371705946 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.4120757797 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.0837672025 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.2648038016 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.0885028071 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.2007646735 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3548422361 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.460765953 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.4690424472 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.6788013861 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1169662945 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.3242693179 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.2556403143 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.4583071754 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.0271486292 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.2182731449 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.0526574176 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.2424108963 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.0762125847 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.2228549327 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.0269063649 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.2401408344 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.0839707225 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.3074010094 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.2478840637 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.4205657928 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0290727628 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1841843114 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0523495621 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1231670583 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.0005257422 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.0721265952 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.000262224 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.0118348356 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0318398305 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1834830244 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0320718253 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.1032515167 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.1989310744 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.408792844 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.2107266229 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.3722535388 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.0223204074 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.1959765545 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.022115131 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.1882969266 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1149995432 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.3052316233 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.1017437337 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.2651695911 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1251179936 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.3078536626 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.1049757961 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.338086632 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0185191424 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.1790132896 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0151653031 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.0537338226 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.0341024751 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.2126115238 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0221151729 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.1431429685 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1041933329 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.3064701129 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.1299185029 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.3570513672 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1170990874 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.3281623219 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1166577127 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.2303280443 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.0192945074 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.2015068169 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.045857499 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.1778848232 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.0278653757 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.2309769046 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.0264488684 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.1618433519 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0187368299 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.210610547 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0070803381 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.0602951272 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.0343738545 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.1971697601 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.012430185 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.0969965616 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1011791445 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.2665626277 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.0674482283 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.1439352867 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0333812973 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2140071833 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.013803565 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.0856760144 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.0857349903 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.2682295704 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.0363984536 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.2133514375 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.0831966089 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.3252283455 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0100264548 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2049643183 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0402775114 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.2132968488 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0178624704 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.1584836987 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.044306682 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.2110608123 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.0101250707 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.1446641679 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.0250471784 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.2097577846 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.034382114 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.2211758055 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.120023798 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.3039131897 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.1137229069 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.3446031673 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1101780964 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.2424045636 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0971253665 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.1223804901 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.0522706053 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.2509451803 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0404811569 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.2098515398 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.1127735687 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.3096427976 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.1171995651 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.39693057 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0249203424 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.183758763 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0148302605 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1329930306 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0171568718 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1561109456 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0097264241 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.0452833915 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.0554840251 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.1538079363 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0033288372 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.036508675 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3055395757 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.4480585816 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2165906221 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.3271537328 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1017362354 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.2782010079 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.1041302213 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.2865629267 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.1031395116 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.3223915745 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.1194174782 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.3618255907 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.0901332073 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.2638668804 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.1408494847 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.2546101322 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.0826481083 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.2913230821 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.1808682916 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.3815777762 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0012309971 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.0385987025 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0093358773 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.0457261214 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0448599501 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.1691371082 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.0527194634 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0187725283 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.1451005114 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0072043177 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.0622904587 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.015192186 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1562018554 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0146518601 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.0664358997 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0168598973 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1350682776 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0192034206 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1607323446 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.0538059584 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.2453781212 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.0917605905 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.2668905804 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.0217870696 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.2165031068 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0464674805 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.2235940604 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1007032416 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.2719560518 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.1266242057 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.3334538145 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.0422003709 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.253591842 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.0280729387 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.1884927612 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.0366379898 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.1847934746 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.0141355453 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.1724636201 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.1550101498 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.3623113506 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1217984824 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.2801870917 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0606788965 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.1688995018 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0007119113 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.0386741345 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.0853746951 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.3009803927 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.0676677726 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.2051763344 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0118399471 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.0818929883 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0131103824 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1075252941 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.2810155518 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.0683323294 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.2657241512 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.0921333598 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.2640680177 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.1042804602 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.257176459 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0453126073 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1567880475 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.027790575 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0855724163 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.096255918 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.2490196736 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.0759551519 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.2765897266 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.030810794 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.1804383237 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.0151653031 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.0766086067 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1578714698 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3784433754 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.1713340477 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.3260532752 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0354904515 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.1880266806 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0178986288 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.0724032398 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.0482308543 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.2087387992 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0574307954 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1098000711 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0118398272 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.1505828307 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.008719744 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.056668863 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0200324188 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.1553578618 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0095466427 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0491504248 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.0513648793 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.2577830867 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.119690435 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.1721639976 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.0821079546 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.3164863838 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1265931852 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.1793067232 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0115347204 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.1485833844 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0093856962 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.0608458885 - }, - { - "model":"microsoft\/phi-4-multimodal-instruct", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1520421573 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.3707336059 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.0325845731 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.1866364833 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.049235994 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.1875704973 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0203716729 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1964052359 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0457848104 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.1635760551 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0043114209 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0733956093 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.0844832543 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.2899357726 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.0202296618 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.1170466993 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1314009634 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.3827163755 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.0953897712 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.2894343613 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0542604747 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.2839533373 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.0103488851 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.1022931459 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1005104859 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3241333261 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.0397410561 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.1986373033 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0370753847 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.2466649661 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0104943059 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1703228075 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.1181763987 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.3010924314 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0774366468 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.232244564 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0700327695 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2793919522 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.04780178 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.1986318307 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0761832692 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.3293090829 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.0940299872 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3022965125 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1421502617 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.3249063292 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.044984749 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.1531327249 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1016298945 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.2980803254 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.0978160022 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.2190252958 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.0784827192 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.3301673127 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.0942154389 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.3318581823 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0227564483 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.1618637003 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0041151275 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.052321141 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1376994092 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.3446006208 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.1126650404 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.3592815418 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.1786073211 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.418923403 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.1529904036 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.4257110482 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.0881428767 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.2731602409 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.1667346071 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.3197259125 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.3166864072 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.5483508218 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.4404172544 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7231001513 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1298121807 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.3403579227 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3743863952 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5971283997 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.1016894588 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.3636401028 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1695199459 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3986853323 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.1164300835 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.3215620941 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.1452350029 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.4128118494 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.1078563354 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.3207926618 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.3659011486 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.580998869 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0208055886 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1775856129 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0303702553 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1647346597 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.0897802232 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3334021167 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0546926081 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.2631919591 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.038636598 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1770095402 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0050909961 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.1811657432 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.1271878224 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.3698932868 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.1324625901 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.3233634009 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.0809118708 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.2537010038 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.022242601 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.2086622767 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1216394809 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4062520998 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.1488006127 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.3814897068 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1489627056 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4172638299 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2008824981 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5185852751 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0505138835 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.1914888261 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0070198993 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1690394526 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.0982399037 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.2845447958 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0238061486 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.2070418144 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1148524922 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.3722842281 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.250138544 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5338430631 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1169165949 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.3638899173 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1330552123 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.2685952079 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.0823011221 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.2825939861 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0327082346 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.2319054893 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.044667859 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.2691000298 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.0732644907 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.3296633392 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0353609299 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.1909025949 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0077167113 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.1386174808 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1165534681 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.3877914341 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1033665849 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.3638806009 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1314926141 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.3540405018 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1705869429 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2612780395 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0158033007 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.1802186885 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.004737288 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1665989397 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.0968803629 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.3271235347 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.0742021289 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.1811100359 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.0830883828 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.2590209016 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0473984845 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2714023791 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0436065244 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.1616079019 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0321839146 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.2872152251 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.1039755938 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.3670583743 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.0490353313 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.3016997477 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1016737952 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3529445259 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0980137705 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3076980329 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2051934522 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.4174861616 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.1030714956 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.3069420156 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1062506996 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.3311804385 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0661499319 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.2408635082 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.1305087747 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.336550146 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0829900967 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.3039678683 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.1166543201 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.3031041679 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.200304354 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4780661009 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0265108253 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.1166554461 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0293332904 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1849240696 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0177577979 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1411822431 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0101970078 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1613941454 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.0446786865 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.1562050743 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0022538162 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.0960982382 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3158857772 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5109022919 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2903521386 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.4758823803 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1198559998 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.3492711529 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2480316528 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.4488014348 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.1398701241 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.3034565852 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3460979115 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.5835851988 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1130151873 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.3818050844 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.1838222494 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.416344125 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.0947682488 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.3259777135 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.1796274314 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.4360781177 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0300958323 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.2021438397 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0097834933 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.1744531846 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0299661217 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.204544657 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0009238366 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.0843772457 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.031724087 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.16451202 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0046124791 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.1085913002 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.017437495 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.142035614 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0146532378 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.1923028552 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0516240546 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.153428686 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0121002424 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1697462625 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.115711536 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.3597872407 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.1694354423 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.4167060912 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.0407632458 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.2238900502 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0714251247 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.2764333203 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2208693059 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4679683611 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3234795754 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5608576982 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.0669163701 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.2784916366 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.0633186191 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.3074668268 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.0917433239 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3851148557 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1299606269 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.3428948363 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.1471870965 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.3392441061 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2060450795 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.3717773766 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0594528699 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.1988219607 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.1247215313 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1079848157 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.2801778291 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.1131365873 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.2713637811 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0118794667 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.1249552242 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0507921341 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1097631082 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.3352988316 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.0795019275 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.2884455353 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.0837893895 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.2809963487 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.2215012201 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4551853935 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0345153294 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1364890072 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0012941396 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1092334478 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.0522713846 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.3192866676 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.0868686952 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.2859772299 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.0689558305 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.22853185 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.0415402981 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.2227329297 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.0649160569 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.2830042558 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.0350138164 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.2206802597 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0588767323 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2123528181 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0134096062 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1594472691 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.0908263331 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3394219762 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.0645497034 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0478741208 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.1576256072 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0235106256 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.1852540612 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0235724586 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.1570768217 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0152437624 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1214492647 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.0740063452 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.2757375638 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.0872564614 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.1427963743 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.0992800287 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.324429867 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.09185491 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.1769207611 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0494841031 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.1999996494 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0196415161 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1910730769 - }, - { - "model":"mistralai\/mistral-nemo", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2867784698 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.5037863792 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2421610142 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4148106883 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0787707917 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.218746848 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0187531501 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1112723085 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0462314764 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.1434072436 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0132821079 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0456848091 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2054466179 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4635698598 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2513408047 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5078766295 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.278999196 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5072892325 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.335915232 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5453940527 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1240264763 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.3914345538 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1767788852 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3838449247 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2249154291 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4283700551 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2483121176 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4611825726 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1368466985 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.3988973343 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0337104112 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.2239889311 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2836423323 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.4827693819 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1511392088 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3858872623 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1923090312 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3973361244 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1398741741 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4023712427 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1262645615 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.389349888 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.164496362 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.42344822 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2246336129 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4406538597 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1192868334 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3094007011 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2557484343 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4816174974 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.336565743 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.500099888 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.1502069597 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.365503748 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.1518861892 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.3795025844 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0716255326 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.2354287318 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0016024995 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.1289259809 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2895060168 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5088283599 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.3502843148 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5851011111 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.33652498 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5394051209 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4124704223 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6500309258 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2440848305 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4482066389 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3683187834 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5300499022 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4669071745 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6351301458 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5813419207 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8065247071 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.293714449 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4892518335 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3400529578 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6054518089 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2392739698 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4813714407 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2321648572 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4166342577 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2811212879 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4244631944 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2282664087 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5153269959 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2548863763 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5181895957 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4309072933 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6122951839 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0365190298 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1781763265 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0417146562 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.0914072868 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.266641943 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4689143537 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1714078748 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.45528413 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0639113657 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1707631202 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0551968249 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.165412979 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3708164771 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5780452995 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3889665973 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5940361548 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.261843766 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4806474097 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0995040783 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3935925698 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.218205371 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4561374245 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2523642916 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5178642158 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2714445111 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5033343062 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3367134056 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6204206544 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0577998278 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.184470268 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0317976664 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1311048104 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.097793149 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.2519860373 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0683877466 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.255829494 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.26428669 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4902151754 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3239977856 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5865126635 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2208745982 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4944838309 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1755723698 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3733502483 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1125731148 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.2778916971 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0861201622 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.3310005151 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1995238484 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4335224538 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.172304501 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4246105774 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0572277693 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.2158207267 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.0812368695 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2415284955 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4761318508 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2506029382 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4820610024 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2390442925 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4660486517 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1889249825 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.314876296 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0673075407 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2078406147 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0194519734 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1011298866 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2922121087 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5037938788 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2113094586 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4865126178 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2362297066 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4746614882 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.108285746 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3934483867 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0484302224 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.2222317379 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0220530515 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.2506994166 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2936668736 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.4940538554 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1686173343 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4033123912 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.3097940645 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5304242832 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2168719994 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4555868419 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3030951939 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5195567075 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3655435175 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6292737269 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.20731642 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.414222781 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0884087592 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.2678188556 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2746115511 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5022730585 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1969099003 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4481037581 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.23536401 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4943928771 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3235114454 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5875209718 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0495608632 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2045968087 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0294620037 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1680044731 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0200154664 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1606834413 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0044245595 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1054952984 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1001204869 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.2836152046 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0445164582 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.2337334441 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4077844252 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6076754833 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4459003493 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5840266721 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2148436144 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4670207413 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2898684366 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.525989117 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.264907032 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4939362461 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4563355662 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6843169799 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.258396409 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.4912290692 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4713411152 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6517904546 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1955652432 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4654058492 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2591393679 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.4936043335 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0562734776 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.212197658 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0314083234 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.1492631083 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0890242869 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.2507686532 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.0807676975 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0084519738 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.151041875 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0056901248 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.100130068 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0267041676 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1555601794 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.018455165 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.1280213362 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0558797598 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1672295272 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0266323434 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1344659816 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2165549669 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4902121608 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2493024035 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.4391116426 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1532491466 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3330342559 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0762662838 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.2740420072 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2875970952 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4962250868 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.382073635 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6293993104 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.0845702794 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.2799479817 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.1327606257 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.4346855791 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2777158956 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4958635491 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2970897235 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5394670378 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3805998732 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5566308844 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3811390337 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5895281984 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0685553777 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.2845942287 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0371906835 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.1797173863 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2079294904 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4320631023 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2456928253 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.3989628007 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0154825384 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.1339811483 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0278445131 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2699521486 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.478882362 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2654978305 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5487755246 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2622533206 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4831695415 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3002613398 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4834403722 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0279025481 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.0843438607 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0224775292 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.1057554869 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2469989894 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4943114536 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2844546137 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.477210689 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1727284585 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4250344787 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1817744295 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4656549066 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2373174322 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4521152897 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2962197342 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.544285644 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0678548322 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.1995976377 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0410008999 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1410775666 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1807443545 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4282740606 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1030562145 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1694260317 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0705660888 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.1999709116 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.005606616 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.0791302868 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0146140319 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.1546761245 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0066574272 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0720455741 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1686505919 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4407452421 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1561257665 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2413348415 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2391219094 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4993680631 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2546682455 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3131256963 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0409238482 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.1759269251 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.040396663 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1387970813 - }, - { - "model":"mistralai\/mistral-saba", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2175149129 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4527759686 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1124798847 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.2758121544 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0499477269 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.161467557 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0061206295 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1175310591 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.0682401612 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0028802187 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0506386945 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1505032551 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4086092545 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.1882561377 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.3814760125 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1878091774 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.4280751788 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2100749947 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.3864616183 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.093266394 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.311215609 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1039989943 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.2907556954 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1020177653 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3338664094 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1064323135 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3080022567 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0979045908 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.3174950846 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0117862293 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1847940791 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2573591397 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.4309874046 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0851522303 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.2284178182 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0818689903 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2636451344 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0746797577 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3147526037 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.0989261118 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.3536619814 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.1746758677 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.3604842775 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1908291186 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4048132215 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.0320570973 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.1921106676 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1554049163 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.3557989532 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2229681692 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.3722031872 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.2130798288 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.3738480621 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.1589337472 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.4334432444 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.037561576 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.1318206471 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.1217595648 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.210101514 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.4301134482 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.362165784 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5625700474 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2836619572 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5189927538 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3481484827 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.5703115876 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2310257801 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4431650209 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.2968021074 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4607855577 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4358717425 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6055153523 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6254747881 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7898596498 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2211802733 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4673850088 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3141247128 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5664089061 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.1378005544 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.3634410941 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1662541754 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3656154799 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2598161419 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4673557809 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2352273865 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5423645035 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2637885864 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.50661739 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.3898419239 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.5983170279 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.030172991 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1791853335 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0012644122 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.0541809315 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1879687767 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3575768224 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1193068232 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.3258076554 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0424529379 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.205545596 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0315047557 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.1862281652 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2931006661 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5117710763 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3454885653 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5632832845 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1423442665 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.3938289086 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0479071398 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.2390532358 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1466679693 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4020226017 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2296543368 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.4623290904 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1846129963 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4218789485 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2363785743 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5715726858 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.042005649 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.1704312564 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0060037968 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1052387436 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1206947602 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.2893596175 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0376635554 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.2154020665 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.203133363 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4421827582 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2636104621 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5093890816 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1660483895 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4317128373 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2448276505 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4102738917 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1159703103 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3116167676 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.0803083214 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.3001867634 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1346426707 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.3961827686 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.138703676 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.4106570721 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0501718274 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.2070022512 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0014765966 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.0534187009 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1342281856 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.3526105747 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.146553268 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.3291051456 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1737342381 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4232883693 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2383832092 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.33001113 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0707102369 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2324042355 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1164466909 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2572289084 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5125685183 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.1223672825 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.3023512099 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2098940087 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.443017936 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0762167285 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2552721118 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0777950532 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.2573049595 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0666322315 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3626993592 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2059798463 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.4231363675 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2059172406 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4148312305 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2216993022 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4221894818 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1386303624 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3369477219 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2392636803 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.4545241599 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3632780792 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6254141203 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.0217828279 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.144054989 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0351771663 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.2287244941 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.1464596557 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.3564056311 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1517063855 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4352500122 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2230448991 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4584787016 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2531295878 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5302964071 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0696320569 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2331307278 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0252073886 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1703033014 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0276939955 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1837469296 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0160221908 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.2144565152 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.083347512 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.2190646209 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0679191643 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.2873941526 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3647734864 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5784168493 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3897396366 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5030239884 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1850936564 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.3999928464 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2726874239 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.4948927457 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2070601418 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4601106145 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.412349088 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6102742767 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2142629544 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.4499459763 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.3895465667 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.5708848992 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1710045162 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.433047449 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2961144006 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5257430939 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.057729338 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.2177957601 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.0230572611 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.16449999 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0484864486 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.1952223401 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0005975301 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.1227717162 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0165865489 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.1532087128 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.1038227782 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0457227327 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.2051868353 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0141586748 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.1192333436 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0421597981 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1665679168 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0011074127 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1483863351 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1814266299 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4404144211 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2821064012 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.492442613 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1500059372 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.385325025 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0190556256 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.201727165 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2063172621 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4245629061 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.2871245672 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5575367366 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.1106429776 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.3246322884 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.1586725311 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.4486820539 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.0861902503 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.2737902674 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1910130331 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4159297845 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.2631133201 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.504043761 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2496114121 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.4707696336 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.0528559098 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.2067746551 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.0275980154 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.1880963665 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1533969949 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.3718867563 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2858310833 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4323129392 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0536249593 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.190207113 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0308454815 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1776801562 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4036600408 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2335702423 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.4992388897 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1788374332 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4671147568 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.2348003993 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4786253942 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0150184554 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1309010161 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0010839978 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0719045729 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1854021136 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4258067424 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1388133394 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3685419874 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1877241389 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4060591516 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1165785177 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4074870036 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2204611632 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4366931331 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2957932526 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5127606293 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0530966299 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.183305815 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0017326575 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1225688999 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1059358827 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3291955196 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0116334446 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.0643182856 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.0665909516 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.2275785677 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0230978994 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.1160311087 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0299390587 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.1474455997 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0919549448 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1503741808 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3985964495 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1755859315 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2407951689 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1386896901 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.413832278 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2503925306 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3045084897 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0365820579 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.1964572986 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0044447951 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1846817289 - }, - { - "model":"mistralai\/mistral-small-3.1-24b-instruct", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0371320408 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.1862601893 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0232552001 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1816122083 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0762582721 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.2256183152 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0155010137 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.111632655 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1105492032 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.3824462343 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2049615052 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4519234477 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1985490849 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.4116485218 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3336371818 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5253002356 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0933745535 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.3852051191 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.112918589 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3403937393 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1051779987 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3391065166 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1966274075 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3960585372 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0575768902 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.3040332139 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0290897017 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.2669483396 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.1716262856 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.4261716241 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1464539147 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3455385109 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0835422268 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3248882933 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0978478358 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3301671275 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1701727662 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4313256486 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.122889461 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.289913907 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1281858401 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.3858938936 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.125191978 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.3696701209 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.2520982183 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.4444681724 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3102736093 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5214898195 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2303251904 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.450613459 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2426993481 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4712451818 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2626874911 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.4988486171 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3809175562 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6066039572 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.1697631286 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.3535802564 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.2098247736 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.417434594 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.399751444 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.5723628973 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.464784706 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.6741611276 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2233867986 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4625939523 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.361048469 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5667561181 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.1859289486 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4402215767 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.0783879247 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.2777670309 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2111316415 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4343589207 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.1904775276 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.4788196159 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2014604354 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.4639286173 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.3927528149 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.5874921326 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0768136914 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.1865808917 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0163860397 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1761153537 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1323783916 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3850340086 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1060744828 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.3580675535 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0737222138 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.2500218213 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0721259007 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.2931833463 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3206299694 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5380529839 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.292022826 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5099805952 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1310248624 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4057643378 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0541491102 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.263637102 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2005919962 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4635609134 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2668215975 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.4987236442 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1674187488 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4422221563 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2922430013 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5510140576 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0555362323 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.236381065 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0170051195 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1450260585 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1928019801 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4582860792 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2733663358 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5231933614 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1843295265 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4175841484 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.247062292 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3738750801 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1248080013 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3323730185 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1222608237 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.3755543507 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.0542399326 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.2976203376 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.0603448772 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.3011538751 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0423245128 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.2646605638 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0030665166 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.1617644115 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.142542051 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.3941172286 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.095018815 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.3690882139 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1298139392 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.3909547555 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2582727386 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3442557032 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2128159963 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.4639121691 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.1877293722 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4156064229 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1522348659 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4142435328 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0938946347 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3116778843 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0549301185 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.2672873596 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0463756582 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.3097498513 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.1141453782 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.3477667157 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1057291821 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.3103268517 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1480972279 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3846594696 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1588893829 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.3218843951 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.217708728 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.449213988 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3422949582 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.5811761531 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1184833265 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.2686318029 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1103891214 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.2937298939 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.192171828 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4057435234 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1172467131 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.375586286 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.1863008756 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4262401563 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2219365699 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4997896782 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0832085938 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2560508851 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0245166671 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1971989167 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.030893556 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1822055745 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0023241318 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1765743592 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.0743696949 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.3048835131 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0867109239 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.3141207717 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.2585423604 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5140115555 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3275813302 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5154143201 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1873357797 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4495400323 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2381984934 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.4592277795 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2145991028 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4593715469 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3539022205 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.5923278871 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1770834914 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.408612856 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.429961987 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.5972964968 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1435401219 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4105586063 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2886257739 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5087363637 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0740797406 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.2542118208 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0601341974 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.1994352479 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0285852473 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.2160067741 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.014651722 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.1375629789 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0391982932 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1784853107 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0244050078 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.2037164659 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0803995043 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.3185143496 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0756351517 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.3153237514 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1706373545 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4266803456 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2642729747 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.4811936124 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1231167016 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3066261581 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.100703346 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.3394192326 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2316592529 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4448279614 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3469148634 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5817285551 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2029959378 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4619706712 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.266835444 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5363657682 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.0974405375 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3189571047 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1013427217 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.40431727 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.2525105285 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.4602158898 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1953888501 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.4178007058 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1343983036 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.3632350324 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2056905071 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.3721609069 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1967376366 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4299666079 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2677982301 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.4953792654 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1687607729 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4201489822 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.2722608778 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4873229562 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1608191811 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4194174213 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1610922206 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3629853655 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1209426537 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.3088806755 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1161087561 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.3498111478 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1204645669 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3876320563 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.1940449441 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.4193086485 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.0921504626 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3200787292 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0569972002 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1268003169 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0423997321 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2100045407 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0262399026 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.1399469356 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1264279499 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3856755463 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.0962471892 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.186203302 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1759566918 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4501489751 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2279432688 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2931038513 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1133170987 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.3072773582 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0779267738 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.3132146793 - }, - { - "model":"openai\/gpt-3.5-turbo-0613", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0878643961 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.2943661311 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0773692656 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.3411692596 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1621142099 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.4428263457 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.1504890085 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2959385484 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2484834927 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5412047755 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2278733475 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4958526675 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.265308921 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.534633443 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.4133349725 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.6096991153 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1444098549 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4354187609 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.2001187188 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4423462053 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.2150495101 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4509543639 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2997633261 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4845814777 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.2491524665 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4859498791 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.138136017 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3570715701 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.3489496041 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5581870497 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1361506132 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3888910906 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1613185111 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4197419896 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1539862054 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4377056399 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2501533038 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.5157072708 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1995636011 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.4138783532 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.3114258781 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5457791444 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3254574442 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5346247566 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.399240538 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.6190587277 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3751238401 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6143783892 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.3217774713 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5782452692 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.4138800821 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.6092456527 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3464595133 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5867734529 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.477316562 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6811707635 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2881859392 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5283141363 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.2909203719 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5023060375 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.492992017 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6758612579 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6212437369 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8160680265 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.3073587665 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5589719771 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.4007144936 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6428162124 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.3047473913 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.541913317 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2033525098 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4483666995 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3915612434 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.6080295028 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.3199141865 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.588811105 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.301906911 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5589917916 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4922424861 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6881055928 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0362803832 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2412638087 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0590184507 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.2182867648 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.293318961 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.5302757414 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1439576296 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4388678133 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2178256702 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4424716551 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2443077504 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5192699912 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3513424619 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5811151557 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3856423281 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6039414456 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2481480247 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.508486097 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1531335794 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3911815819 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2876998483 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5628772937 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3576201412 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.6153922032 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.3584275831 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.6013859082 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3499912941 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6402792518 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.2060192505 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.4844534641 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1961860496 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4729428536 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2912028765 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5457371537 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3511828155 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.6085037742 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2166158629 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.5075364476 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2931058111 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4502993046 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.3528030853 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5716064196 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2391235505 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5839078959 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.2190440582 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4908662007 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.3214797925 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5762282439 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2706398193 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5473511459 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1856132097 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3901344593 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2760595824 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.523164531 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.3130393907 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.5247440023 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1776114575 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4741587712 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2067364767 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3116379221 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3601446012 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5969111652 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2296100147 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4744292053 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.3243424349 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5709461451 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1272378515 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4523606053 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2917695916 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.5143075365 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2866152436 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5663273613 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.3202923873 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5629214829 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2000751863 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4730887312 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.247457636 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.5269197766 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2884569727 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.5016795899 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3751686059 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.6197326636 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.4401130744 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.7145000136 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.3303579297 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.549332604 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2328260511 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.5032600779 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.307994769 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5645741484 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2521628085 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5130367104 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2706764356 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5187692381 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3256395629 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6002234371 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.2193020818 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4750942093 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1102557203 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4682292826 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1206079965 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3822546587 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0450954747 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3804747142 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2759362863 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.521953003 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1651830786 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4234486928 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.4189225146 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6647373749 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4787138393 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.6097836343 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2802625681 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5439670195 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3443088412 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5816388936 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3203407715 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5533544406 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4490877978 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6695132668 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2794082054 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5408123233 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4984350217 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6700105545 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2317861129 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5199717777 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3286463098 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5704087395 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3269082527 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5720782047 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2945581276 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4770478865 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.2237147063 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4928025786 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.2213737985 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.4089512188 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0991434845 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3451095887 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1515589229 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.5015201773 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2407783488 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4696462601 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1992814962 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4739682422 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2962617057 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5718773299 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3246138439 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.539231236 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2138222548 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4678880839 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1812913523 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4862460633 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2736390873 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5516496981 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3409932056 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6325116451 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.3084306564 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.5446161895 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.4087794747 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.669062824 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2266585274 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.5043938863 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2800009794 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.5340783161 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3852899552 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.6247940844 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3365460818 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5508261106 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2949417989 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.5355554723 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3946124626 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5531143677 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2730640179 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.539343275 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.367297377 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.6209268292 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2800024381 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.5593725229 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3549515665 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5522777328 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2068585944 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.5050627139 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2927501641 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.484706219 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2209837875 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4853024301 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2062980634 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5064032134 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2809055533 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5421068577 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.4180012555 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.6302564473 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.2097645573 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4732281256 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.119486019 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1666195088 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.1280239382 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.360470667 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0762109546 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2828209251 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1794305621 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4543396215 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1841660038 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2628923071 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.2064136736 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.5051800847 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2731019968 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3107160924 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.278809167 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.5407280723 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.266135659 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5629331219 - }, - { - "model":"openai\/gpt-4.1", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2198316321 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.4708151995 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2234579509 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4401488964 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1056657743 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.3249231698 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0618207736 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.340632844 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.1521950168 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.3956387285 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0953768122 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2497030659 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2413546506 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.5235234652 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2271910382 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4908497482 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2566573338 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5183862763 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3901123396 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5771753105 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1301518556 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4290918442 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1797566847 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.4271851106 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1724971212 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4012455839 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2496844101 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4720007075 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1595843783 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4348621346 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1540181476 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.351822758 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2979811644 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5341221534 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2462308641 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4432887674 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1840798833 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.4151816693 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1349305067 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.397143235 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.1644448391 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4629038808 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2550498255 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4544854197 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.2230536146 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.499651958 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.2227847146 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.40695057 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2819696539 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.5293077213 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3092254935 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.4661357412 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.365645255 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.5771393179 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.3274189601 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5630354446 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.1379843601 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.3936670775 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.1018796158 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.3594406238 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2953385985 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5543620654 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.342919616 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5729115023 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.3052927761 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5504382993 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4246355556 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6487523813 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2853090403 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.5033746216 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.2976764649 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4568078793 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5178458342 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6792020066 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6631992536 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8257245236 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2800331904 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5328441069 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.3927902573 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.628791549 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.274202443 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.5239221129 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.144419277 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.405746187 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3517517227 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.5678423102 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2861570496 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.567530869 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2950999056 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5638983665 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.5061822417 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6903823708 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0579371031 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2407036725 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0338899407 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1685773285 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2249525185 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4726822454 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1646493878 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4071725376 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.2293529776 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.4521332467 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2554536105 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.5371463729 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3531906075 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.6060071382 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.399293733 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6132292528 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.2232750657 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4801269988 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1922860161 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4363534921 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2647815263 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.5197043469 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3268056763 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5486126608 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2559881532 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5349715693 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3900018149 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6494354052 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1469460203 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3801695829 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1926475709 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4187626054 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1826483605 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.4686350803 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1078652833 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.421647984 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.3047636442 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5385736571 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3458314466 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5969984451 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2100828863 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4717405627 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2844229339 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4435245651 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2977682173 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.5413323701 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2537598479 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5629521778 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1646050237 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4508391233 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2752297553 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5180256955 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2132140468 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.5000034068 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1292151863 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3392182289 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.2382712271 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4901100456 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2584591395 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4586627531 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1783139223 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4481556757 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.3002017818 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.3739762238 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0772118618 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2815494636 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.001488949 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.1588971491 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.3080966975 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.5628489014 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2981973224 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5193764902 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2802761469 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.5391751615 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.2042851472 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4615978684 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.243797007 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4981055966 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.2038296766 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5453530515 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2649575888 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5072138807 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.186903033 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.411527522 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2358876365 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.4961149155 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2674122275 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4442281313 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.3443124421 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5824988714 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3763691574 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6619682382 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.2596129619 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.485235691 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2234699025 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4866737746 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.3085593402 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.5486177789 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2319189577 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4830752425 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2639124065 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.5166255119 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.342163716 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6089275595 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.158564127 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.4086927045 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1237632416 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4428640995 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.1266863364 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3723937215 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0507341481 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3372593565 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.2495546416 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.505250418 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1370990235 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4188964845 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3898113091 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.6349932626 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4218934881 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5699211354 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2238263799 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.5157447202 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3376407171 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5674744623 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2989764302 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5575461672 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4347143661 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6526848356 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2365245444 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5263187531 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4295443245 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6303158648 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2156464838 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4961661832 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3525318267 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5377697887 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.1358779492 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.4047237198 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2528406351 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5425926629 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.3052635197 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.5444415164 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.319777613 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.5070316671 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.224754909 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.4942892862 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1763683901 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3592673643 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0579407228 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3218620552 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1045487932 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4627951581 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.2457303069 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.4607096598 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.2208751843 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.499725177 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2080392025 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.5029005766 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3955111551 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5840966612 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2108703792 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4463761953 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1957956536 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5109625366 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.3026696791 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5540321116 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.379491342 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6341129937 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.2373352462 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4812305289 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.3592771753 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.6196466978 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.2189074797 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.458549356 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2096038798 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.48158495 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3847076164 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5961555843 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3031970309 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.4938747459 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1923337483 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4073259848 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1777393755 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4229927395 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1973765077 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.478302799 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.390011731 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5192332126 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0956165324 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.3010660185 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0201085128 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.1279466164 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.3027350341 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5488245098 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3543135567 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5739783335 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2739321887 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.523898319 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3702945368 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5833117124 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0456473272 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1799246176 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0060102851 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0643020373 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2282243664 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4878680978 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2701355148 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4485608146 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.2068814622 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4820023997 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1797290418 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5053214161 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2800966186 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5447813345 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3743034645 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5977965321 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0698928855 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2670199291 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0438604879 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.2188199264 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.169474795 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.4233016879 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1115736327 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1741292068 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1777667306 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.4381801577 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0807523022 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.4027285347 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0871363585 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.3153084592 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0712302827 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.266201042 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1722143774 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.451617464 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1362044502 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2472412788 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.211203078 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4926704854 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2808125016 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3349070044 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1986426867 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4568960366 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.228999134 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.5092438205 - }, - { - "model":"openai\/gpt-4.1-mini", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.1996215211 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.452996678 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.1894542228 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4104759123 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0772997859 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.2795137394 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0490092548 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2793012345 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0821675771 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.3261780265 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0581954137 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.2133137227 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2038935703 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4744865332 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.198427289 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4466553325 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2230716751 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.4868000305 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3431634646 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5539675011 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0936861 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4019740671 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1320458692 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3969132003 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.147678651 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3870664018 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1925682475 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.424235974 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1549108661 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.3657540248 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.1300687711 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3850114254 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2822625676 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.5120051075 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1956229389 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4165258378 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1020059939 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.3685311802 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1584050367 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.4078207292 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.129620916 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4193741335 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.1911430477 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.423978547 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1839199068 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4589371965 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1754477624 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3729187467 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2104863522 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4534437048 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.2824626 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.4822940799 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3067568845 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.522767718 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.2603548365 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.5472674101 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.061613272 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.2738044534 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0495010223 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.2930209689 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2480975275 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.4908345188 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2780131154 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.5272272242 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.282506513 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5438865496 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.4220387975 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6440212985 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2138985353 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4787400928 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3347502447 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.5277403226 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.4926163025 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6598180449 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5627424753 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7654936904 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2400384539 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4846098061 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.331169359 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.541145091 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.1934774812 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4555822394 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1515704996 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4288811212 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3049498802 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.531726813 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2572361601 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5465796366 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2245970544 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.4773636644 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.456500631 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6519350009 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0508716923 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2061725545 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.001678581 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.0842472305 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1811700298 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4450724584 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1152635411 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.3751269086 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1042620188 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.353512414 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.1697643488 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4858315893 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3100386494 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5747433617 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3858833658 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.5991711103 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1533855474 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4227840042 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1131265551 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3711711494 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2089476707 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4868357652 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.2953914361 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5360583303 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1907075731 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4662972265 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.3386484563 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.6376664219 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0974819198 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3736857308 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.170722725 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4039469282 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1538751748 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.3921570735 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.1222763549 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.3651682861 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2389788634 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4950691973 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3197810714 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5649240218 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2240038475 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4860646744 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2284065848 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3753787999 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1867349669 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4122967846 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.20591358 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5231507594 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1757000759 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4569938635 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.2608919204 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.495117819 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.1391396286 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.3419293202 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.11776525 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3296737913 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1659549387 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4341818109 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1728970527 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4006604704 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1418786679 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4096096806 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.2767863837 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.343465352 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.0557337494 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2661879916 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.010496354 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2072817599 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2919442529 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.558994569 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2232843577 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.4785649547 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1885764001 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4905954379 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1518354017 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4184448049 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2031343023 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4809424331 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1531227243 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.486171029 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2649114053 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.5258722646 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1638513843 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.3671674679 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1755855974 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.464470709 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2053629902 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4466569291 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2391713081 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5284921106 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3851770392 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6778949951 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1921402736 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.4286234239 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.226941594 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4545167964 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2044174225 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4393220695 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.204358035 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.4838815717 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.222496921 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.455228974 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2971608126 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5809601739 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0616783152 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2486430016 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.1096684518 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4236658223 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0470010342 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.3053087334 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0354609608 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.3000471846 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1342178934 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.4186845018 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0959778877 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4062370429 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3790281875 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5948460259 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.4122107278 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5709045042 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1956391774 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4696282098 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3089097764 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5592753275 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2493437671 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.4876335319 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3816451478 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6189446172 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2159926241 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.485645425 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.4231210461 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.611328256 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2106935755 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4916756186 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2957139688 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5505026606 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0983614688 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.3467631983 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2084502331 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5081363979 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.183239364 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.4023445581 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.188899922 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3987821089 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.1067913788 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.3496287521 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1841725143 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3632256251 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0529920463 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.282085967 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1362552545 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4625012714 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.1467149035 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.3956649623 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.1685063005 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4680460244 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1938115187 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4876215653 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.301648159 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5098794037 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1547225512 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3915293941 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1695373764 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4747320433 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2691126673 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.4857803464 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3512121942 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6095777745 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.1987953868 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4232825095 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.2845246017 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5836686109 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1209729479 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3863152501 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.2257337081 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4945472603 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3081208582 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5470122853 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.2720935434 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.4603538628 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1562871243 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.4076252967 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1559524999 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.4140020888 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.1699563701 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4279668426 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3008412738 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4707696326 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0460275677 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2471496791 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0301094125 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0934926984 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.1772339365 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4203181275 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3130066985 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5687455638 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2094411351 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4706103434 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3725710921 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5552868727 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0334143542 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1507136538 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0016606076 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0502679049 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1864708336 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4670816214 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2648238029 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4478960511 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1939396294 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.4361718347 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1189904742 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4347992199 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2392083536 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5109371286 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.3593480951 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5929525126 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.0586128965 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2347632724 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0021532802 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1133302543 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.160147676 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.391740055 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0779637528 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1486256305 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.1080830211 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.337342999 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0541477061 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3900223164 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0799412014 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2678562615 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0384415516 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2172940187 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1368607253 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.4397284879 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1909241711 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2765267822 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1574414981 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4616304665 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2468189144 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3136635386 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1806603372 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4477026286 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.1983500358 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.4823277126 - }, - { - "model":"openai\/gpt-4.1-nano", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"bleu", - "score":0.2046887048 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"translation_from", - "metric":"chrf", - "score":0.3905043974 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"bleu", - "score":0.2440190587 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"aeb", - "task":"translation_to", - "metric":"chrf", - "score":0.4467530618 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"af", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.1088055906 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.2952376966 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.04860361 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.2749922921 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.080495827 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.287512266 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0649609212 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.203944936 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.2086617902 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4774317011 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2730334942 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.5458981435 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.2530052174 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.5158812138 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.3439536667 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.5691908832 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1377297001 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.4304104417 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1906837255 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3931621016 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1573943285 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.4039837102 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.2385684611 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.4690487202 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1786795263 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.4382834543 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.142030089 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.3706217658 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2061194828 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.38382712 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.2351754729 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.4412955741 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.1025818924 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.355623252 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.1290294373 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3807908275 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_from", - "metric":"bleu", - "score":0.116157646 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_from", - "metric":"chrf", - "score":0.4411553165 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_to", - "metric":"bleu", - "score":0.2284052455 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"be", - "task":"translation_to", - "metric":"chrf", - "score":0.4432025312 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1573424376 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.3813908093 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1860567167 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3798747224 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bm", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.2047894665 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.4476643899 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.3413387194 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.5056140066 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ca", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.3321604587 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.524735789 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.399945485 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.6275070378 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"bleu", - "score":0.0862560502 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_from", - "metric":"chrf", - "score":0.2788047314 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"bleu", - "score":0.0359802782 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ckb", - "task":"translation_to", - "metric":"chrf", - "score":0.2225612749 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.2777777551 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.5317009045 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2755276023 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4907555325 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2840890109 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.5146969249 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3999539422 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.6267391818 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.2595428958 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.4813680319 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.3306804036 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.4976939797 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.5232930808 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.6688775695 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6469796865 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.8203785308 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2793939864 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.5176409834 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.4118937163 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.6353341411 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.2052699799 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.4764669046 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.2131911377 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.4147480093 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.3062563146 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4925975136 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.32039199 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.5717901387 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.2706688563 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.5148499232 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4808374237 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6855290209 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0270875349 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.2100353402 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.052858761 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1950018354 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.2245042279 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4426786034 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.1920269509 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.4643025206 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.1154893286 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.3792147754 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.2191612695 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.4879764503 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.3473235908 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.5515454754 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.3991894826 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.6121310121 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.247888062 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.4353918541 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1626119723 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.4423709529 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ht", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.2640028594 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.524505973 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.3929863672 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.5880857849 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2522725561 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.5212732474 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2850030055 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5970450995 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.1326727529 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.3646478687 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.1882093096 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.4009607044 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"bleu", - "score":0.1988516559 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_from", - "metric":"chrf", - "score":0.405478436 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"bleu", - "score":0.0961457593 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ilo", - "task":"translation_to", - "metric":"chrf", - "score":0.4060794313 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.2746808629 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.5180176469 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.3112912727 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5712680542 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.2363319461 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.4826308954 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.2593036542 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.4231415642 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.2480055389 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.4685108662 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.2241033812 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.5113817494 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_from", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ki", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.147911394 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.3985376686 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.290182238 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.5572310551 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.2772807862 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.504897576 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.1404234583 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.3189837953 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1849035655 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.430576325 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.2444722013 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.4742295195 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.2013601575 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.4475607863 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1980395856 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2888993735 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"bleu", - "score":0.042643493 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_from", - "metric":"chrf", - "score":0.2339244707 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"bleu", - "score":0.0143208425 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"lua", - "task":"translation_to", - "metric":"chrf", - "score":0.2382431413 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2373436047 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.4564427975 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.2754056305 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.5123611693 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.2027297928 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.4495211176 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1639594712 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.4482904829 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.2035781185 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.4598803974 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.1964079195 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.5179064416 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2246064108 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.4311975246 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.2434125045 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.4971145063 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1756463826 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3823527701 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.2340922946 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.4581322597 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.2993296846 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.5743132494 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3504238332 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6154153931 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1741885177 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.449774491 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.2106778 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.4610458467 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2483418024 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.4432537254 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.2305902219 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.5017217229 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2523126947 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4772912105 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.3639443469 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.6252850371 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0835095719 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.2797853634 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0779315192 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.4121236337 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0511068522 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.2702934215 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0724907554 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.345324531 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1164462601 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.3729006132 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.1379248705 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.4079943111 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3640514137 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.5847142015 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.3988016179 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.5745254523 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.2319955399 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4971130964 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.293769398 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.535772663 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.3014354397 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.5315937202 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.4535066637 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.6773057972 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"qu", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.2532461677 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.5201960699 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.5052082065 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.6686611337 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.2270580453 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.5034759488 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3258505825 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.5592402358 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"bleu", - "score":0.0829790682 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_from", - "metric":"chrf", - "score":0.337986391 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"bleu", - "score":0.2129352292 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"rw", - "task":"translation_to", - "metric":"chrf", - "score":0.5084793087 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.15024418 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.3597265355 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.2369214411 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.4711257499 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0980707024 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.3109100287 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.1934430032 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.3560526886 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0582100604 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.3075785834 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.1011833785 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.4367282377 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.1531795055 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.3616443224 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.2049307012 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.4719724156 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.2199024767 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4907562634 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.3830980295 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.5736359642 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.2379030124 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.4403417868 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.2238060743 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.5243303769 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.2852268785 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.5304479976 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3829618265 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.6326982198 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.222064455 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.4652246692 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.299635051 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.5860066036 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1407382127 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3831149186 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.214481784 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.4692538776 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3646122831 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.5746253001 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.3003064302 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.5444122929 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"classification", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"bleu", - "score":0.1128962774 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_from", - "metric":"chrf", - "score":0.3493465213 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"bleu", - "score":0.1841235337 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tg", - "task":"translation_to", - "metric":"chrf", - "score":0.417497165 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.2392194968 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4936638572 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.3622208845 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.5010514821 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"bleu", - "score":0.0408372058 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_from", - "metric":"chrf", - "score":0.2042987422 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"bleu", - "score":0.0231278614 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ti", - "task":"translation_to", - "metric":"chrf", - "score":0.0849541719 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2621174982 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.5019200442 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.3816990204 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.5999237379 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.2245951815 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.4480996711 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3852002404 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.5711778517 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"classification", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"bleu", - "score":0.0415789397 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_from", - "metric":"chrf", - "score":0.1350551103 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"bleu", - "score":0.0010517421 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"umb", - "task":"translation_to", - "metric":"chrf", - "score":0.0968599255 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.2436356521 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.4877029713 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2603784132 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4734427307 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1411472616 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.3855156193 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.2021458884 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.4930438511 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1995232614 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.4582270744 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2597310259 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.514972808 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"bleu", - "score":0.08218909 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_from", - "metric":"chrf", - "score":0.2632475474 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"bleu", - "score":0.0383287658 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wo", - "task":"translation_to", - "metric":"chrf", - "score":0.1924695915 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.159437398 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.373213248 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1199632327 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1706758411 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"bleu", - "score":0.158569201 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_from", - "metric":"chrf", - "score":0.3926886149 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"bleu", - "score":0.0591321886 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"xh", - "task":"translation_to", - "metric":"chrf", - "score":0.3427023375 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.079919346 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.279919938 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0720231313 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.2187010976 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1290283283 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3797298683 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1636055441 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2449501177 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1773725218 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.458620733 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2753136513 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.3274827604 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.9 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.1866603918 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.4108538087 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.164847197 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.4524644478 - }, - { - "model":"openai\/gpt-4o-mini", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.055487033 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.195788708 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1124541522 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0818660054 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.2443718379 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0312578478 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0892192454 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.1879703279 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.4082307283 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.2032527408 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.4252508109 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1511448079 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.3482345089 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2660157525 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.4467729024 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0827758372 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.2694627987 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1091045232 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3272460856 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1266156847 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3088203065 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1843867072 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3542134538 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.1057754271 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.2879318321 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0399332596 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1436097331 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2082859775 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.3721700071 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.1319221867 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3256562506 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0834540968 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2549210714 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0664704876 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.2928996174 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1339563491 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.2272649787 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.1668638517 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.3029135903 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1789816822 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.3618057577 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.1498239832 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.2860628236 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.2518844765 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.4578479284 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.1735279991 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.3732578771 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1571790351 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.3555264704 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2188910218 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.3725672408 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.1899770324 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.3001055745 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3043234734 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.4952039495 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.1310803496 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.2985898996 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.1645861291 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.2473851762 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.1962899792 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.316041879 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.5755388881 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.7200533933 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1318044964 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.3364780931 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.2564170555 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.4382398152 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.1533507904 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.3596802703 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1539918473 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3118556242 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2008801988 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.3714765993 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.1940537936 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.4792247969 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.1824127171 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.3884840193 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.3876504315 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.5389108131 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.0738447366 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0240639493 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.1911668884 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1949542031 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.4052894246 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0646513332 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.1693257519 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0392475471 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.126618872 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0445527444 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.1499587951 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.2268010617 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.3688527647 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.2230471235 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.4142622149 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1156575532 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.2236151918 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0698561479 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.2305830236 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1691753276 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.3383017469 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.1871269984 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.3731318657 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.1585058297 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.3050841055 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.1943793424 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.4973073268 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0963109471 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.2727236688 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0660035863 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.158376548 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1542082331 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.3028124272 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2460472209 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5145531621 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1612360434 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.3349199354 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.117016066 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.208990655 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1729476776 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3959364431 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1785425051 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.4249603279 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1793896966 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.4090639994 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1405729124 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.3820186042 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0698832994 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.2284330377 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0851826028 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.1882485322 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1743539627 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.4083936939 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1166173259 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.2305285039 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1358227204 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.3423557444 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1953069902 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2564265013 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2314152421 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.4725672887 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.17835674 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.3229842432 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1533461204 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.3472008961 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.1168236528 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.2271783619 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0928957375 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.244366675 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0070995906 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.1262437392 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.1971269045 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.3735961781 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1323978127 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.1893359682 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.2135948303 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3958565999 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.1503233282 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.275821329 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.1307137096 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.2920741112 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3539116395 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.5832656935 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1255424452 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.329335139 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0516414641 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.1567420369 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.2023089106 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.3806028698 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0980312706 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.264649599 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.1993799127 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.4072508102 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.213081855 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4114277627 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0393547699 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.163604057 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0139872791 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.1446751186 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0330655518 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.1646544216 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0093749808 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1074800017 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.223399015 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.4518680174 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0545741621 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.1291918248 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.1485025023 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.3380088662 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.1374014148 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.2051503897 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1586103513 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.302194795 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.3897966488 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.5766535228 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.137323181 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.3095327986 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.2669467187 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.3969322178 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1792721327 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.4176277039 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.3094152813 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.4953887976 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1584004696 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.38671906 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.3118021035 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.4665016839 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1090127159 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.2748895651 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.1361757276 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.3342529311 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.1417180597 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.3566626666 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0411134672 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.1347885554 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0397889862 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1881103773 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0165581412 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.1414877053 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0815647227 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.2487364334 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0253075503 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.2274689496 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1882164689 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.3943730373 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.2381466467 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.4346543613 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.15780181 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3437437662 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.1075072996 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.4294170504 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1975945861 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.374398795 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.345880422 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5436350308 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.1913501957 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.3785864037 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.1219661246 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.3307175909 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1483607311 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.3430470513 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.1257952581 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.2720024162 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.378185741 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.557354327 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1301924452 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.2509083971 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":1.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.147695853 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.345415746 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.1876537733 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.3005344914 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2078229702 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.3841741235 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.147060653 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.3761003189 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1363197738 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.2845774158 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3118632296 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.4917979058 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1623147303 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.3145943461 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.2226311966 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.4414805706 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.0849999362 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.2406274728 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.279180562 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.5116111495 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.1911759573 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.3812712354 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2038650525 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.4035183237 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1435587328 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3490715453 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.1113617435 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.171370503 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0366725514 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.2007464145 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0113521992 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0657310926 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1121528943 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3178178007 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1508604775 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2175191576 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1039910991 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.2728656752 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2326991429 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2529625335 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.121119706 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.2540648952 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0235611585 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1930902578 - }, - { - "model":"qwen\/qwen3-235b-a22b", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0050031284 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.0924942363 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0116797169 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.0585141671 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.0764700894 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.2647322624 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.0882851827 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.2833576827 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.106692739 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.295973969 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.0607816225 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.1277246917 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.0362905586 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.1081740165 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1172745059 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.2687874448 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.0438294169 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.108019816 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.0673513704 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.2079164994 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.0514848851 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.1736145704 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0238993213 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.1411125068 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.1396565072 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.2428031494 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.0383901491 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.1005302975 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0278489988 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.1190175818 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0367390088 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.0930542371 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.0736017029 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.1960588462 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.0585115493 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.1332846728 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.0429848247 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.2044845821 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.0299885561 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.0829092043 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.1804319747 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.3101350547 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.0701534813 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.1430960661 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1415195376 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.288893664 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.1392279949 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.2551496147 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.1564210937 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.3072042217 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.2457069766 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.3367028296 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.0578542202 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.1674077812 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.0663537525 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.1816240149 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.2474773351 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.3070994171 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.4019192682 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.4782905978 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.1341566102 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.2765542043 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.1161792768 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.2919898174 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.0817283606 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.241409878 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.0476911924 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.1464645136 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.1279159996 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.2829362826 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.0388514243 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.1409030042 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.134253193 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.2989125898 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.2077048384 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.2946343811 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0181579676 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.0543819937 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0084320884 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.0454173788 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.0897072533 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.200957971 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.0377543414 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0111815534 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1020769257 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.021314569 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.0830626431 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.1220519036 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.2639335507 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.0973415259 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.2016493248 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.0349119748 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.1556602705 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.0494255017 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.1669634575 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.1211482424 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.2505103528 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.164473668 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.3043231306 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.0833080649 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.2451270085 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.1631993738 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.3566219474 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0073674163 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.0518830276 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0292915569 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.0528593127 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1046004559 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.2507392983 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.1255584711 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.2268219009 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.0941710304 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.2704334257 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.0992512617 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.1515216003 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1016751568 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.2254142889 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1238616028 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.3669735224 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.0534454977 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.193605134 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.0265149039 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.1928985788 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.0889265704 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.2173396783 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.0392373629 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.0711012835 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.1937626852 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.0417334285 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.2140485243 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.0725025436 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.07710948 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.0771251025 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.2350655007 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.1021996092 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.2179221866 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.0444196112 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.2032069734 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0626942847 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.1962655319 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0065978992 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.0423907689 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0220679536 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.0589969179 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.1351914125 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.2585829871 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.0901612807 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.1634932067 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.0940283278 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.2412937356 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.1354857092 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.0831447868 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.2593808275 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.2740792798 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.4472159864 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.0693332571 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.1429077445 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.0671076397 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.2039597866 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.0167754523 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.1541036377 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.1500475481 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.2732969464 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2819860484 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.4877091208 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0158298608 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.116087277 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.0604762339 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.0477354473 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.0430498724 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.0224903847 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.1205338978 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0083749469 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.0240423066 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.1058521796 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.2379060391 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.0340105109 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.1018981548 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1200562696 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.2833586847 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.0585253067 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.226893054 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.0734185487 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.2234046866 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.2994720627 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.4412841692 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.0737479957 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.2334413367 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.2671240661 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.4582064143 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.0655622212 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.2295301444 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.1421377727 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.3364832122 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.0729593007 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.1686728011 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.0254323861 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0173772616 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.094294748 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0249272374 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1318435849 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0114625376 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.0309494652 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.018362811 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.0903053603 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.0361908088 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.0697403266 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.2326685525 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.0901351217 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.1761295618 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.0358186402 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.1577931474 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0657495832 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.2377800817 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1439757229 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.3280954777 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.1315707916 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.2532056747 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.0822395206 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.2293817888 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.0711409402 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.2295116951 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.0281826938 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.0907703349 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.2172508025 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.0867988442 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.2789247181 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.0817852216 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.1305309896 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.134883333 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.2942401793 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.1382385998 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.250580016 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.0975469561 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.2592386604 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.11477212 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.2025044003 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1035672471 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.2255615453 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1161482705 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.2453264465 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.069654902 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.2336481279 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.0972829087 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.2111334793 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.0562684736 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.2112928198 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.039901967 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.2071350414 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.092781215 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.2596321396 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0743041275 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1172524094 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.0559568244 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0188433826 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.0807367939 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.2250049533 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.1178746954 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.1300639553 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.1028766672 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.2670830369 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.1623451886 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.2013735709 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.0409146661 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.1675889915 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.0850348967 - }, - { - "model":"qwen\/qwen3-30b-a3b", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"bleu", - "score":0.0197191428 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"translation_from", - "metric":"chrf", - "score":0.1664518353 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"bleu", - "score":0.022953237 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ak", - "task":"translation_to", - "metric":"chrf", - "score":0.1745004402 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"mgsm", - "metric":"accuracy", - "score":0.2 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"translation_from", - "metric":"bleu", - "score":0.0796218409 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"translation_from", - "metric":"chrf", - "score":0.2147354921 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"translation_to", - "metric":"bleu", - "score":0.010900097 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"translation_to", - "metric":"chrf", - "score":0.0657137696 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"am", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"apc", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"bleu", - "score":0.132513614 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"apc", - "task":"translation_from", - "metric":"chrf", - "score":0.3795883854 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"bleu", - "score":0.137952669 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"apc", - "task":"translation_to", - "metric":"chrf", - "score":0.3264835371 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"bleu", - "score":0.1656993834 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"translation_from", - "metric":"chrf", - "score":0.3867250082 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"bleu", - "score":0.2206826239 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ar", - "task":"translation_to", - "metric":"chrf", - "score":0.4063080067 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ary", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"bleu", - "score":0.1079587982 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ary", - "task":"translation_from", - "metric":"chrf", - "score":0.3785381059 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"bleu", - "score":0.1218401593 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ary", - "task":"translation_to", - "metric":"chrf", - "score":0.3257594737 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"arz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"bleu", - "score":0.1042685601 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"arz", - "task":"translation_from", - "metric":"chrf", - "score":0.3470585369 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"bleu", - "score":0.1401256855 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"arz", - "task":"translation_to", - "metric":"chrf", - "score":0.3746457154 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"translation_from", - "metric":"bleu", - "score":0.12200123 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"translation_from", - "metric":"chrf", - "score":0.3327938776 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"translation_to", - "metric":"bleu", - "score":0.0166926581 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"as", - "task":"translation_to", - "metric":"chrf", - "score":0.0857862708 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"bleu", - "score":0.2393773898 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"translation_from", - "metric":"chrf", - "score":0.480146856 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"bleu", - "score":0.13553124 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"awa", - "task":"translation_to", - "metric":"chrf", - "score":0.3086397875 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"translation_from", - "metric":"bleu", - "score":0.0984273348 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"translation_from", - "metric":"chrf", - "score":0.2705806557 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"translation_to", - "metric":"bleu", - "score":0.0899956365 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"az", - "task":"translation_to", - "metric":"chrf", - "score":0.3292198004 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"bleu", - "score":0.1944585572 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"translation_from", - "metric":"chrf", - "score":0.4135649539 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"bleu", - "score":0.0841754475 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bho", - "task":"translation_to", - "metric":"chrf", - "score":0.2832267135 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"bleu", - "score":0.1905773039 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"translation_from", - "metric":"chrf", - "score":0.3974640862 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"bleu", - "score":0.1738721227 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"bn", - "task":"translation_to", - "metric":"chrf", - "score":0.3341823126 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"bleu", - "score":0.2913417198 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"translation_from", - "metric":"chrf", - "score":0.4715886747 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"bleu", - "score":0.2042487615 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ceb", - "task":"translation_to", - "metric":"chrf", - "score":0.3926894761 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"bleu", - "score":0.1724601448 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"translation_from", - "metric":"chrf", - "score":0.3857736694 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"bleu", - "score":0.2045983077 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"cs", - "task":"translation_to", - "metric":"chrf", - "score":0.4057824257 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"translation_from", - "metric":"bleu", - "score":0.2174481184 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"translation_from", - "metric":"chrf", - "score":0.4124810034 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"translation_to", - "metric":"bleu", - "score":0.3022742815 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"de", - "task":"translation_to", - "metric":"chrf", - "score":0.4968168009 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"translation_from", - "metric":"bleu", - "score":0.1714951139 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"translation_from", - "metric":"chrf", - "score":0.3572714199 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"translation_to", - "metric":"bleu", - "score":0.1874209861 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"el", - "task":"translation_to", - "metric":"chrf", - "score":0.3148917242 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"translation_from", - "metric":"bleu", - "score":0.446850518 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"translation_from", - "metric":"chrf", - "score":0.5288962517 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"translation_to", - "metric":"bleu", - "score":0.6607457062 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"translation_to", - "metric":"chrf", - "score":0.828511917 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"en", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"translation_from", - "metric":"bleu", - "score":0.2056231855 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"translation_from", - "metric":"chrf", - "score":0.4306632094 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"translation_to", - "metric":"bleu", - "score":0.279042145 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"es", - "task":"translation_to", - "metric":"chrf", - "score":0.5148983586 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"bleu", - "score":0.0657228626 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"translation_from", - "metric":"chrf", - "score":0.3139715852 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"bleu", - "score":0.1461056975 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fa", - "task":"translation_to", - "metric":"chrf", - "score":0.3475439511 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fil", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fil", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"bleu", - "score":0.2438433878 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fil", - "task":"translation_from", - "metric":"chrf", - "score":0.4512223379 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"bleu", - "score":0.2208119792 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fil", - "task":"translation_to", - "metric":"chrf", - "score":0.4378335772 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"bleu", - "score":0.1721857235 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"translation_from", - "metric":"chrf", - "score":0.477225501 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"bleu", - "score":0.4037533819 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fr", - "task":"translation_to", - "metric":"chrf", - "score":0.6067640163 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fuv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"bleu", - "score":0.0224194954 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fuv", - "task":"translation_from", - "metric":"chrf", - "score":0.105046056 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"fuv", - "task":"translation_to", - "metric":"chrf", - "score":0.082276319 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"bleu", - "score":0.1698272846 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"translation_from", - "metric":"chrf", - "score":0.3349276506 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"bleu", - "score":0.0740380781 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"gu", - "task":"translation_to", - "metric":"chrf", - "score":0.2296558189 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"bleu", - "score":0.0341558033 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"translation_from", - "metric":"chrf", - "score":0.1515090956 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"bleu", - "score":0.0178215481 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"translation_to", - "metric":"chrf", - "score":0.206138289 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ha", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"bleu", - "score":0.220218347 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"translation_from", - "metric":"chrf", - "score":0.4387911559 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"bleu", - "score":0.2024184343 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hi", - "task":"translation_to", - "metric":"chrf", - "score":0.4252839653 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"bleu", - "score":0.1375555656 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hne", - "task":"translation_from", - "metric":"chrf", - "score":0.3016838615 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"bleu", - "score":0.1141821718 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hne", - "task":"translation_to", - "metric":"chrf", - "score":0.3531692508 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"bleu", - "score":0.201485104 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"translation_from", - "metric":"chrf", - "score":0.4060774974 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"bleu", - "score":0.1845697152 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"hu", - "task":"translation_to", - "metric":"chrf", - "score":0.3651920542 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"translation_from", - "metric":"bleu", - "score":0.2119242961 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"translation_from", - "metric":"chrf", - "score":0.4027870816 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"translation_to", - "metric":"bleu", - "score":0.2845893115 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"id", - "task":"translation_to", - "metric":"chrf", - "score":0.5618854988 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"bleu", - "score":0.0631617801 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"translation_from", - "metric":"chrf", - "score":0.2207616259 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"bleu", - "score":0.0508365473 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ig", - "task":"translation_to", - "metric":"chrf", - "score":0.1697158135 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"translation_from", - "metric":"bleu", - "score":0.1840631549 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"translation_from", - "metric":"chrf", - "score":0.4301865089 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"translation_to", - "metric":"bleu", - "score":0.2482222138 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"it", - "task":"translation_to", - "metric":"chrf", - "score":0.5228204728 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"bleu", - "score":0.1127930596 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"translation_from", - "metric":"chrf", - "score":0.2787849105 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"bleu", - "score":0.1840671906 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ja", - "task":"translation_to", - "metric":"chrf", - "score":0.3135227124 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"bleu", - "score":0.1834764341 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"translation_from", - "metric":"chrf", - "score":0.3297107768 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"bleu", - "score":0.1569316995 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"jv", - "task":"translation_to", - "metric":"chrf", - "score":0.467165329 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"bleu", - "score":0.1096372066 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"translation_from", - "metric":"chrf", - "score":0.2865411962 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"bleu", - "score":0.1388075288 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kk", - "task":"translation_to", - "metric":"chrf", - "score":0.3285307881 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"translation_from", - "metric":"bleu", - "score":0.170745871 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"translation_from", - "metric":"chrf", - "score":0.3490488807 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"translation_to", - "metric":"bleu", - "score":0.0792740607 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"km", - "task":"translation_to", - "metric":"chrf", - "score":0.2285805687 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"bleu", - "score":0.1679784179 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"translation_from", - "metric":"chrf", - "score":0.3620246212 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"bleu", - "score":0.1242897501 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"kn", - "task":"translation_to", - "metric":"chrf", - "score":0.3007681742 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"bleu", - "score":0.1276543618 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"translation_from", - "metric":"chrf", - "score":0.3677051571 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"bleu", - "score":0.1980497946 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ko", - "task":"translation_to", - "metric":"chrf", - "score":0.2107568779 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mag", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"bleu", - "score":0.2450449733 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mag", - "task":"translation_from", - "metric":"chrf", - "score":0.457594122 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"bleu", - "score":0.198485011 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mag", - "task":"translation_to", - "metric":"chrf", - "score":0.3848787397 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"bleu", - "score":0.1927814544 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"translation_from", - "metric":"chrf", - "score":0.3935457095 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"bleu", - "score":0.0565208468 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mai", - "task":"translation_to", - "metric":"chrf", - "score":0.3131063701 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"bleu", - "score":0.0354662811 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"translation_from", - "metric":"chrf", - "score":0.1686638218 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"bleu", - "score":0.0314143451 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mg", - "task":"translation_to", - "metric":"chrf", - "score":0.2728599885 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"bleu", - "score":0.2371717296 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"translation_from", - "metric":"chrf", - "score":0.4148173757 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"bleu", - "score":0.1193636287 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ml", - "task":"translation_to", - "metric":"chrf", - "score":0.2090448587 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"bleu", - "score":0.1181276928 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"translation_from", - "metric":"chrf", - "score":0.3044523516 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"bleu", - "score":0.0922160441 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"mr", - "task":"translation_to", - "metric":"chrf", - "score":0.2284498534 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"bleu", - "score":0.1968422851 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"translation_from", - "metric":"chrf", - "score":0.4053316305 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"bleu", - "score":0.3766906478 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ms", - "task":"translation_to", - "metric":"chrf", - "score":0.6371848492 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"translation_from", - "metric":"bleu", - "score":0.1225410694 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"translation_from", - "metric":"chrf", - "score":0.3037176244 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"translation_to", - "metric":"bleu", - "score":0.1060809306 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"my", - "task":"translation_to", - "metric":"chrf", - "score":0.2215712232 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"bleu", - "score":0.1375199333 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"translation_from", - "metric":"chrf", - "score":0.3471041134 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"bleu", - "score":0.1115971998 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ne", - "task":"translation_to", - "metric":"chrf", - "score":0.2915076183 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"bleu", - "score":0.2385777935 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"translation_from", - "metric":"chrf", - "score":0.46025989 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"bleu", - "score":0.2726117583 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"nl", - "task":"translation_to", - "metric":"chrf", - "score":0.5059991136 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"bleu", - "score":0.0351245421 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"translation_from", - "metric":"chrf", - "score":0.1823407405 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"bleu", - "score":0.0228348515 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ny", - "task":"translation_to", - "metric":"chrf", - "score":0.2201854752 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"translation_from", - "metric":"bleu", - "score":0.008627568 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"translation_from", - "metric":"chrf", - "score":0.190294404 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"translation_to", - "metric":"bleu", - "score":0.0084651752 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"om", - "task":"translation_to", - "metric":"chrf", - "score":0.1971638266 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"translation_from", - "metric":"bleu", - "score":0.1927446862 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"translation_from", - "metric":"chrf", - "score":0.3892188652 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"translation_to", - "metric":"bleu", - "score":0.0747658241 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"or", - "task":"translation_to", - "metric":"chrf", - "score":0.2195369005 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"bleu", - "score":0.3258973448 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"translation_from", - "metric":"chrf", - "score":0.4626835685 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"bleu", - "score":0.2107798391 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pa", - "task":"translation_to", - "metric":"chrf", - "score":0.3008568297 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"bleu", - "score":0.1854293513 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"translation_from", - "metric":"chrf", - "score":0.4307605073 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"bleu", - "score":0.2420005385 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pl", - "task":"translation_to", - "metric":"chrf", - "score":0.4288325052 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ps", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ps", - "task":"mgsm", - "metric":"accuracy", - "score":0.5 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ps", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"bleu", - "score":0.2177730164 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"translation_from", - "metric":"chrf", - "score":0.433438268 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"bleu", - "score":0.3402518575 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"pt", - "task":"translation_to", - "metric":"chrf", - "score":0.4997252818 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"bleu", - "score":0.1593298949 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"translation_from", - "metric":"chrf", - "score":0.4017223467 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"bleu", - "score":0.2726504789 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ro", - "task":"translation_to", - "metric":"chrf", - "score":0.4406178765 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"bleu", - "score":0.1869622361 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"translation_from", - "metric":"chrf", - "score":0.4301337345 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"bleu", - "score":0.2546688585 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ru", - "task":"translation_to", - "metric":"chrf", - "score":0.3959108821 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"bleu", - "score":0.1610657464 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"translation_from", - "metric":"chrf", - "score":0.3043802738 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"bleu", - "score":0.0315314884 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sd", - "task":"translation_to", - "metric":"chrf", - "score":0.2120098132 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"translation_from", - "metric":"bleu", - "score":0.0130905001 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"translation_from", - "metric":"chrf", - "score":0.1973781543 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"translation_to", - "metric":"bleu", - "score":0.0134555536 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"si", - "task":"translation_to", - "metric":"chrf", - "score":0.1176319627 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"bleu", - "score":0.0330243636 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"translation_from", - "metric":"chrf", - "score":0.1963889628 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"bleu", - "score":0.0428214603 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sn", - "task":"translation_to", - "metric":"chrf", - "score":0.2352543457 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"mgsm", - "metric":"accuracy", - "score":0.3 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"translation_from", - "metric":"bleu", - "score":0.0329294407 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"translation_from", - "metric":"chrf", - "score":0.1742241015 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"translation_to", - "metric":"bleu", - "score":0.0243956065 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"so", - "task":"translation_to", - "metric":"chrf", - "score":0.1607978429 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"bleu", - "score":0.1604574347 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"translation_from", - "metric":"chrf", - "score":0.4084047683 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"bleu", - "score":0.1664454505 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sr", - "task":"translation_to", - "metric":"chrf", - "score":0.34203002 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"translation_from", - "metric":"bleu", - "score":0.1257406217 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"translation_from", - "metric":"chrf", - "score":0.3378715267 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"translation_to", - "metric":"bleu", - "score":0.0667296519 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"su", - "task":"translation_to", - "metric":"chrf", - "score":0.3980540266 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"bleu", - "score":0.1672894127 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"translation_from", - "metric":"chrf", - "score":0.3684788102 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"bleu", - "score":0.3614873089 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sv", - "task":"translation_to", - "metric":"chrf", - "score":0.5392220773 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"mgsm", - "metric":"accuracy", - "score":0.8 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"bleu", - "score":0.1200999603 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"translation_from", - "metric":"chrf", - "score":0.2908221442 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"bleu", - "score":0.023689627 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"translation_to", - "metric":"chrf", - "score":0.2453164021 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"sw", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"bleu", - "score":0.1882602024 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"translation_from", - "metric":"chrf", - "score":0.4059862729 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"bleu", - "score":0.0841932466 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ta", - "task":"translation_to", - "metric":"chrf", - "score":0.260536888 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"translation_from", - "metric":"bleu", - "score":0.3422950731 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"translation_from", - "metric":"chrf", - "score":0.4841100904 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"translation_to", - "metric":"bleu", - "score":0.1765162745 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"te", - "task":"translation_to", - "metric":"chrf", - "score":0.3133803312 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"translation_from", - "metric":"bleu", - "score":0.196772439 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"translation_from", - "metric":"chrf", - "score":0.4191567084 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"translation_to", - "metric":"bleu", - "score":0.2773920621 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"th", - "task":"translation_to", - "metric":"chrf", - "score":0.4482220675 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"bleu", - "score":0.2102634926 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"translation_from", - "metric":"chrf", - "score":0.4159372483 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"bleu", - "score":0.2364464274 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"tr", - "task":"translation_to", - "metric":"chrf", - "score":0.4789525721 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"bleu", - "score":0.1579324347 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"translation_from", - "metric":"chrf", - "score":0.3647294785 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"bleu", - "score":0.3006688281 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uk", - "task":"translation_to", - "metric":"chrf", - "score":0.468364849 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"bleu", - "score":0.1705686173 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"translation_from", - "metric":"chrf", - "score":0.3937623183 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"bleu", - "score":0.1711380057 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"ur", - "task":"translation_to", - "metric":"chrf", - "score":0.3502063066 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"mgsm", - "metric":"accuracy", - "score":0.6 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"bleu", - "score":0.1315627205 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"translation_from", - "metric":"chrf", - "score":0.3801109933 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"bleu", - "score":0.1184994967 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"uz", - "task":"translation_to", - "metric":"chrf", - "score":0.3552530055 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"mgsm", - "metric":"accuracy", - "score":0.7 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"bleu", - "score":0.2826147232 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"translation_from", - "metric":"chrf", - "score":0.5352078445 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"bleu", - "score":0.2891587891 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"vi", - "task":"translation_to", - "metric":"chrf", - "score":0.5030568081 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"wuu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"bleu", - "score":0.1923006081 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"wuu", - "task":"translation_from", - "metric":"chrf", - "score":0.3850610484 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"bleu", - "score":0.0704239199 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"wuu", - "task":"translation_to", - "metric":"chrf", - "score":0.1309128692 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"mgsm", - "metric":"accuracy", - "score":0.1 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"bleu", - "score":0.0078376559 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"translation_from", - "metric":"chrf", - "score":0.1634867622 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"bleu", - "score":0.028486223 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"translation_to", - "metric":"chrf", - "score":0.0875657048 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yo", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"bleu", - "score":0.1257691602 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"translation_from", - "metric":"chrf", - "score":0.3303444225 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"bleu", - "score":0.159764099 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"yue", - "task":"translation_to", - "metric":"chrf", - "score":0.2311709663 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"mgsm", - "metric":"accuracy", - "score":0.4 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"bleu", - "score":0.170228681 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"translation_from", - "metric":"chrf", - "score":0.4530475535 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"bleu", - "score":0.2387260041 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zh", - "task":"translation_to", - "metric":"chrf", - "score":0.288127087 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"arc", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"classification", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"mgsm", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"mmlu", - "metric":"accuracy", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"bleu", - "score":0.04772924 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"translation_from", - "metric":"chrf", - "score":0.237905051 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"bleu", - "score":0.0 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"translation_to", - "metric":"chrf", - "score":0.1114053338 - }, - { - "model":"qwen\/qwen3-32b", - "bcp_47":"zu", - "task":"truthfulqa", - "metric":"accuracy", - "score":0.0 - } -] \ No newline at end of file +version https://git-lfs.github.com/spec/v1 +oid sha256:38416f0f457b715fb061e6c76493c203417fffba4cb0a33d5f669622c76956ef +size 57376543