diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -620,7 +620,7 @@ "family": "Indo-European", "flores_path": "nld_Latn", "fleurs_tag": "nl_nl", - "commonvoice_hours": 117.0, + "commonvoice_hours": 118.0, "commonvoice_locale": "nl", "in_benchmark": true }, @@ -1292,7 +1292,7 @@ "family": "Indo-European", "flores_path": "cat_Latn", "fleurs_tag": "ca_es", - "commonvoice_hours": 2852.0, + "commonvoice_hours": 2851.0, "commonvoice_locale": "ca", "in_benchmark": true }, @@ -3560,7 +3560,7 @@ "family": "Abkhaz-Adyge", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 37.0, + "commonvoice_hours": 38.0, "commonvoice_locale": "kbd", "in_benchmark": false }, @@ -4616,7 +4616,7 @@ "family": "Turkic", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 13.0, + "commonvoice_hours": 14.0, "commonvoice_locale": "sah", "in_benchmark": false }, @@ -4976,7 +4976,7 @@ "family": "Indo-European", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 0.5, + "commonvoice_hours": 0.6, "commonvoice_locale": "kvx", "in_benchmark": false }, @@ -5432,7 +5432,7 @@ "family": "Indo-European", "flores_path": null, "fleurs_tag": null, - "commonvoice_hours": 0.1, + "commonvoice_hours": 1.1, "commonvoice_locale": "gjk", "in_benchmark": false }, @@ -8429,3459 +8429,13011 @@ }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 + "score": 0.4226799078177409, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 + "score": 0.5651672709988255, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.07407154448063642, - "sentence_nr": 1 + "score": 0.32406433662077544, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.43145434527321425, - "sentence_nr": 1 + "score": 0.5243586266504104, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 + "score": 0.34633672321253084, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 + "score": 0.5378805625051344, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 + "score": 0.5077888484472814, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 + "score": 0.6493197366069867, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.06647168102389285, - "sentence_nr": 1 + "score": 0.4318843329340524, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.34350832619898364, - "sentence_nr": 1 + "score": 0.6011096108554106, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 + "score": 0.3582301850807646, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 + "score": 0.5380305837807603, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.12422788549118892, - "sentence_nr": 1 + "score": 0.3732667150787326, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.40222210564426, - "sentence_nr": 1 + "score": 0.5674650482249737, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.09735981717515908, - "sentence_nr": 1 + "score": 0.4641883721676649, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.35288934658906385, - "sentence_nr": 1 + "score": 0.6403267149729506, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 + "score": 0.300740577257699, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 + "score": 0.5272774705181614, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 + "score": 0.3576035471132581, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 + "score": 0.5426399702952437, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.3961285597009415, - "sentence_nr": 2 + "score": 0.4422044705926463, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6148751441350505, - "sentence_nr": 2 + "score": 0.6089032707320831, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 + "score": 0.3099603853356145, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 + "score": 0.5209233176748354, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 + "score": 0.33210944907163426, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 + "score": 0.5289420578289948, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.17374951565433233, - "sentence_nr": 2 + "score": 0.4331131003868224, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.45325597884524305, - "sentence_nr": 2 + "score": 0.5898969623074624, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 + "score": 0.35580399268816465, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 + "score": 0.5392592206305507, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.26459538953931094, - "sentence_nr": 2 + "score": 0.4475435253337274, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5272178908335121, - "sentence_nr": 2 + "score": 0.5956867226653717, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.26801022984888695, - "sentence_nr": 2 + "score": 0.5274220384037692, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.5654883864995515, - "sentence_nr": 2 + "score": 0.6765588140322357, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 + "score": 0.39317381456022266, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 + "score": 0.6026058740561834, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 + "score": 0.4166560818400039, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 + "score": 0.6515522498665886, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.7281051247089317, - "sentence_nr": 3 + "score": 0.42734667499155, + "sentence_nr": 0 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7882997401328445, - "sentence_nr": 3 + "score": 0.6397906518456509, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 + "score": 0.48930936408255293, + "sentence_nr": 0 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 + "score": 0.699085629239476, + "sentence_nr": 0 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 + "score": 0.15815751066481462, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 + "score": 0.5152611872266766, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.25552199116069907, - "sentence_nr": 3 + "score": 0.07407154448063642, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3799133205289109, - "sentence_nr": 3 + "score": 0.43145434527321425, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 + "score": 0.12903696060775005, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 + "score": 0.456225988032654, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.5642761727828352, - "sentence_nr": 3 - }, + "score": 0.12369892692249995, + "sentence_nr": 1 + }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6181373706707737, - "sentence_nr": 3 + "score": 0.44549610902403686, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.4093301993048525, - "sentence_nr": 3 + "score": 0.06647168102389285, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.512762518189388, - "sentence_nr": 3 + "score": 0.34350832619898364, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 + "score": 0.12560672881768975, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 + "score": 0.4969560260291519, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 + "score": 0.12422788549118892, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 + "score": 0.40222210564426, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.09735981717515908, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.35288934658906385, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 + "score": 0.08273178236238297, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 + "score": 0.36399666460809255, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.13714845589364738, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.45499281593451946, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.11564012893219777, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.44599783682350064, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.12601482779921785, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 + "score": 0.43595665254608706, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.3026566818840519, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.49546288984677567, - "sentence_nr": 4 + "score": 0.5945859352092411, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 4 + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.43795381992037963, - "sentence_nr": 4 + "score": 0.2521233582161207, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 + "score": 0.40959087443621306, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 + "score": 0.6348509381122925, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.07793031063789554, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.3700181221537743, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.0867932999243575, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.4201964133235075, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.08214106568089705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.3969463877642616, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 + "score": 0.41649654108052436, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.3630576975795868, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 + "score": 0.0744904632040495, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 + "score": 0.4111163205685468, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 + "score": 0.08767210132815903, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.2828367156737383, - "sentence_nr": 5 + "score": 0.40476518002703893, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.48181149445310956, - "sentence_nr": 5 + "score": 0.08616711094288851, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7675828789334244, - "sentence_nr": 5 + "score": 0.3696512763473903, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 + "score": 0.12894104034845807, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 + "score": 0.4486368934849452, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.14738500064905094, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.4659728395318289, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.15386029327005746, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.43911482594829104, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.10070927557742705, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.43718220262892105, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.3390387389794623, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.6170420596680538, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_from", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_from", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_from", - "metric": "bleu", - "score": 0.4216890913810254, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.6885217194158456, - "sentence_nr": 6 + "score": 0.3370100422576744, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.5014756677893482, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 1 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.7958858211784339, - "sentence_nr": 6 + "score": 0.1946966569103724, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 + "score": 0.0772718393063023, + "sentence_nr": 1 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 + "score": 0.4203683137304257, + "sentence_nr": 1 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 + "score": 0.38870674200492367, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 + "score": 0.6484380084879691, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.17905278399134197, - "sentence_nr": 7 + "score": 0.3961285597009415, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.37257295447029826, - "sentence_nr": 7 + "score": 0.6148751441350505, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 + "score": 0.4923751299732868, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "translation_from", "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 + "score": 0.6853756490381199, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 + "score": 0.11133996756497437, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 + "score": 0.4410280353998367, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.1418524086391329, - "sentence_nr": 7 + "score": 0.17374951565433233, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.38295770773758747, - "sentence_nr": 7 + "score": 0.45325597884524305, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 + "score": 0.17743299460161885, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "translation_from", "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 + "score": 0.43071271897416463, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.19074380068002203, - "sentence_nr": 7 + "score": 0.26459538953931094, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.40566585096277824, - "sentence_nr": 7 + "score": 0.5272178908335121, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.17382347640129553, - "sentence_nr": 7 + "score": 0.26801022984888695, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.4061580777885601, - "sentence_nr": 7 + "score": 0.5654883864995515, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 + "score": 0.21665407194210906, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", "task": "translation_from", "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 + "score": 0.4344921442639243, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 + "score": 0.3563758622144919, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 + "score": 0.6037023613177924, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.598931508663349, - "sentence_nr": 8 + "score": 0.3574583793293068, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.7353063745802827, - "sentence_nr": 8 + "score": 0.5924115119819969, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 + "score": 0.37994652561206577, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "es", "task": "translation_from", "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 + "score": 0.6464467277069994, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 + "score": 0.2158914621804855, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 + "score": 0.5448184155666022, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.4262221594184117, - "sentence_nr": 8 + "score": 0.22292726306270316, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.5886657414856064, - "sentence_nr": 8 + "score": 0.5653789747970112, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 + "score": 0.09362261118571368, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "ar", "task": "translation_from", "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 + "score": 0.3452056942265759, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.3315037521841549, - "sentence_nr": 8 + "score": 0.18031307339768174, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.468197879470805, - "sentence_nr": 8 + "score": 0.522164454804456, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.43186481103649477, - "sentence_nr": 8 + "score": 0.21403222128228389, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5792139686527714, - "sentence_nr": 8 + "score": 0.563121432204311, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 + "score": 0.18917620656425485, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ur", "task": "translation_from", "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 + "score": 0.4346170232980484, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 + "score": 0.2999092588227898, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 + "score": 0.5505916495384416, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "bleu", - "score": 0.18559542135951204, - "sentence_nr": 9 + "score": 0.4054983797456263, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "translation_from", "metric": "chrf", - "score": 0.3804842882867387, - "sentence_nr": 9 + "score": 0.6264774230839022, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.420450507904553, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.6503146347305717, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", "task": "translation_from", "metric": "bleu", "score": 0.0, - "sentence_nr": 9 + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.4263684749347053, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20051119758906127, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.5334791309401924, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.24894072982768842, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", "task": "translation_from", "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 + "score": 0.5212235893093335, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 + "score": 0.2562849004088193, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 + "score": 0.5767019342009202, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.15122189206102096, - "sentence_nr": 9 + "score": 0.3535002370419364, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.26750110507308866, - "sentence_nr": 9 + "score": 0.5959879218348465, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "pt", "task": "translation_from", "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 + "score": 0.393613605227227, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "pt", "task": "translation_from", "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 + "score": 0.6492198447661237, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.30327872414714485, - "sentence_nr": 9 + "score": 0.2465888500427759, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.49804213541579834, - "sentence_nr": 9 + "score": 0.5221084445696768, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.21685485833927476, - "sentence_nr": 9 + "score": 0.35983766090218355, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3714219747170047, - "sentence_nr": 9 + "score": 0.5862251404739759, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 + "score": 0.21147734744561483, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pa", "task": "translation_from", "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 + "score": 0.41020178654369294, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.5745954681260859, - "sentence_nr": 0 + "score": 0.38249626297768063, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7920051188244848, - "sentence_nr": 0 + "score": 0.40976234193505356, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6358921902612438, - "sentence_nr": 0 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8041899227402122, - "sentence_nr": 0 + "score": 0.7882997401328445, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.6299285159340671, - "sentence_nr": 0 + "score": 0.5806197937310393, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.7993134129243716, - "sentence_nr": 0 + "score": 0.7346706700987636, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.3572514590810421, - "sentence_nr": 0 + "score": 0.08635800047213174, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40312319760122833, - "sentence_nr": 0 + "score": 0.218109371254876, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.35059076445515835, - "sentence_nr": 0 + "score": 0.25552199116069907, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.40219803477483124, - "sentence_nr": 0 + "score": 0.3799133205289109, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.41316127706749806, - "sentence_nr": 0 + "score": 0.23386786214190372, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.4430321339435623, - "sentence_nr": 0 + "score": 0.3682311523733465, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.9878765474230741, - "sentence_nr": 0 + "score": 0.5642761727828352, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.9958930217841712, - "sentence_nr": 0 + "score": 0.6181373706707737, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.8780634320789833, - "sentence_nr": 0 + "score": 0.4093301993048525, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.926946700115022, - "sentence_nr": 0 + "score": 0.512762518189388, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.7964573357809173, - "sentence_nr": 0 + "score": 0.6244631487487835, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.8458636471716781, - "sentence_nr": 0 + "score": 0.6931369519059803, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.28822910320599077, - "sentence_nr": 1 + "score": 0.2615858282579583, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6087031937056202, - "sentence_nr": 1 + "score": 0.35447530946908884, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.3880515884750121, - "sentence_nr": 1 + "score": 0.4577275269488853, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.6587916715823183, - "sentence_nr": 1 + "score": 0.6747054474171109, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.5142726846179982, - "sentence_nr": 1 + "score": 0.25383339228798274, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.7344716263345912, - "sentence_nr": 1 + "score": 0.45896379476820603, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4094748015187699, - "sentence_nr": 1 + "score": 0.15138514598766048, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.4288513205758089, - "sentence_nr": 1 + "score": 0.3237497764315872, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.4487746167679644, - "sentence_nr": 1 + "score": 0.27668736912821895, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.4476730201191672, - "sentence_nr": 1 + "score": 0.4414406760568898, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2836623400057614, - "sentence_nr": 1 + "score": 0.17200767571780612, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.29147337237183046, - "sentence_nr": 1 + "score": 0.3723150838362789, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.16950698451288215, - "sentence_nr": 1 + "score": 0.15604242268653643, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.48668984177868246, - "sentence_nr": 1 + "score": 0.2255928425212252, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.2113054108348111, - "sentence_nr": 1 + "score": 0.4751132438608344, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.44238229987470284, - "sentence_nr": 1 + "score": 0.6159319815107203, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.26207903587847736, - "sentence_nr": 1 + "score": 0.1477219991186121, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.50073123223194, - "sentence_nr": 1 + "score": 0.28685201698226354, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.6837528314895732, - "sentence_nr": 2 + "score": 0.24728515687112834, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7968789890147058, - "sentence_nr": 2 + "score": 0.3088155734423375, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.7017829861193574, - "sentence_nr": 2 + "score": 0.7281051247089317, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7743327021667388, - "sentence_nr": 2 + "score": 0.78479833664205, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.6961795371760597, - "sentence_nr": 2 + "score": 0.3254455687469726, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.7859480663394858, - "sentence_nr": 2 + "score": 0.4474512036484817, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.2360941227140328, - "sentence_nr": 2 + "score": 0.353203510510529, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.35939098278145853, - "sentence_nr": 2 + "score": 0.4910213297498164, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.14118350058219528, - "sentence_nr": 2 + "score": 0.4815092081725061, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.20431837779877604, - "sentence_nr": 2 + "score": 0.5820265218174012, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.1811004938014804, - "sentence_nr": 2 + "score": 0.23887527917609022, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.2649993136544717, - "sentence_nr": 2 + "score": 0.4120359948636439, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.23649053182388327, - "sentence_nr": 2 + "score": 0.23660362391696813, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.4127382174759535, - "sentence_nr": 2 + "score": 0.34152697838249696, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.20721924345714232, - "sentence_nr": 2 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.36475932190367044, - "sentence_nr": 2 + "score": 0.7246473808162345, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.11386607947762988, - "sentence_nr": 2 + "score": 0.580451128369423, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.33564583347921473, - "sentence_nr": 2 + "score": 0.728208634600343, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.8107492451395732, + "score": 1.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.900032747778274, + "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.8761560783209453, + "score": 0.8482942955247808, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.9484564543183253, + "score": 0.9256238040654331, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.7505336182671021, + "score": 1.0, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.8401910628269498, + "score": 1.0, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.13725861056573663, - "sentence_nr": 3 + "score": 0.8003203203844999, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.11147384852362276, - "sentence_nr": 3 + "score": 0.9453478043428296, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.13453927150397377, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.10522974272748564, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.22055493694673897, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.3931965048763613, - "sentence_nr": 3 + "score": 1.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.6885326214539055, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.8229812189228393, - "sentence_nr": 3 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.839587623092576, - "sentence_nr": 3 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.9096086668952811, - "sentence_nr": 3 + "score": 0.629039349740581, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.8003203203844999, + "score": 0.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.9453478043428296, + "score": 0.49546288984677567, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.43795381992037963, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 1.0, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 1.0, + "score": 0.5881561248602009, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.7215691881328408, + "score": 0.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.735100789804592, + "score": 0.40854152133685306, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.4765874091118851, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.45911557772276623, + "score": 0.41213231348812146, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.45022125383821326, + "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.46874267375238576, + "score": 0.40435987083533204, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.2028736642487601, + "score": 0.0, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.43458947791319813, + "score": 0.39618802899930716, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.3013901676230198, + "score": 0.39618802899930716, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", "score": 0.0, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.38106012955734714, + "score": 0.39858613265631837, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.5948724602646328, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.2887138086538547, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.6342291345998248, - "sentence_nr": 5 + "score": 0.5042211795038526, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 + "score": 0.4425973012069069, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.27447938256311044, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.615291848344044, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.37589902061551017, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.42554151277542873, - "sentence_nr": 5 + "score": 0.5516607622642397, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.20748131961458333, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.2716205232346228, - "sentence_nr": 5 + "score": 0.47160616105623426, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.4135171000263379, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.7050151549073953, - "sentence_nr": 5 + "score": 0.33762297226992255, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.42988105429544615, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.7577244658187771, - "sentence_nr": 5 + "score": 0.4576529535952892, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.5366411241731205, - "sentence_nr": 5 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.825566494253596, - "sentence_nr": 5 + "score": 0.5309982646782259, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.2658483576665877, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6410540990527072, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 + "score": 0.24601372576927547, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 + "score": 0.6374693500772332, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 + "score": 0.1892240568795935, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "pt", + "task": "translation_from", "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 + "score": 0.6151179643430991, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.3450219162509876, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.3993348853061597, - "sentence_nr": 6 + "score": 0.6562641136790542, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.5561195823338172, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5362935676066722, - "sentence_nr": 6 + "score": 0.46426595961938383, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "bleu", - "score": 0.5803515898273521, - "sentence_nr": 6 + "score": 0.0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "pa", + "task": "translation_from", "metric": "chrf", - "score": 0.5422220468910552, - "sentence_nr": 6 + "score": 0.41238100267720657, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.2562150245540302, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.47046477830594896, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.33438299066966715, - "sentence_nr": 6 + "score": 0.2887138086538547, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5409759573191787, - "sentence_nr": 6 + "score": 0.6342291345998248, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "bleu", - "score": 0.37854068916316835, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "en", + "task": "translation_from", "metric": "chrf", - "score": 0.5743796566387722, - "sentence_nr": 6 + "score": 1.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.44711013370113256, - "sentence_nr": 7 + "score": 0.47410002229034043, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7319347493436125, - "sentence_nr": 7 + "score": 0.7538467008030766, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.49023502313124495, - "sentence_nr": 7 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.7638414724136195, - "sentence_nr": 7 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "bleu", - "score": 0.4424906782646928, - "sentence_nr": 7 + "score": 0.5087473540251254, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "zh", + "task": "translation_from", "metric": "chrf", - "score": 0.705507971295129, - "sentence_nr": 7 + "score": 0.7647955332172516, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.21326369102393236, - "sentence_nr": 7 - }, + "score": 0.0, + "sentence_nr": 5 + }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.24781828193168487, - "sentence_nr": 7 + "score": 0.2828367156737383, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.07860105393900486, - "sentence_nr": 7 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.09678377693633947, - "sentence_nr": 7 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "bleu", - "score": 0.11601141307045003, - "sentence_nr": 7 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "hi", + "task": "translation_from", "metric": "chrf", - "score": 0.21671187566850864, - "sentence_nr": 7 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.2577716972449781, - "sentence_nr": 7 + "score": 0.44897710722021167, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5171901208397282, - "sentence_nr": 7 + "score": 0.6862249089515978, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.35015224715252113, - "sentence_nr": 7 + "score": 0.404727200247809, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5701648579139658, - "sentence_nr": 7 + "score": 0.6681898017773897, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "bleu", - "score": 0.3349252032650068, - "sentence_nr": 7 + "score": 0.40276720463657734, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "es", + "task": "translation_from", "metric": "chrf", - "score": 0.5908087431574293, - "sentence_nr": 7 + "score": 0.6529271690805427, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.22816849039973935, - "sentence_nr": 8 + "score": 0.2704091953828695, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5295534280606148, - "sentence_nr": 8 + "score": 0.6207272323003366, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.348007986647201, - "sentence_nr": 8 + "score": 0.5379348324975908, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.6148736550683231, - "sentence_nr": 8 + "score": 0.7703766110349561, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "bleu", - "score": 0.31222258402876674, - "sentence_nr": 8 + "score": 0.30188353873287377, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "ar", + "task": "translation_from", "metric": "chrf", - "score": 0.5549937870516303, - "sentence_nr": 8 + "score": 0.6086565367747951, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.48181149445310956, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.10721126066665879, - "sentence_nr": 8 + "score": 0.7675828789334244, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.23683075175361493, - "sentence_nr": 8 + "score": 0.5091224918749461, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.2631328190836655, - "sentence_nr": 8 + "score": 0.7829685247145245, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 + "score": 0.6026286934891149, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation_to", + "bcp_47": "ur", + "task": "translation_from", "metric": "chrf", - "score": 0.16455392433653304, - "sentence_nr": 8 + "score": 0.8025775976044891, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 8 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.5707860320039717, - "sentence_nr": 8 + "score": 0.713787745993602, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.461597801606675, - "sentence_nr": 8 + "score": 0.5896613549548209, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.6280777654467244, - "sentence_nr": 8 + "score": 0.7528914749586836, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "bleu", - "score": 0.4224298950114519, - "sentence_nr": 8 + "score": 0.4596980088392874, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation_to", + "bcp_47": "fr", + "task": "translation_from", "metric": "chrf", - "score": 0.60823085524287, - "sentence_nr": 8 + "score": 0.713787745993602, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.43103580001357805, - "sentence_nr": 9 + "score": 0.0, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.6690742226623104, - "sentence_nr": 9 + "score": 0.24706467963183681, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.6031612036218008, - "sentence_nr": 9 + "score": 0.4801289744823913, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.736286703381354, - "sentence_nr": 9 + "score": 0.6766690087429765, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "bleu", - "score": 0.39432344823662835, - "sentence_nr": 9 + "score": 0.3272712268138726, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation_to", + "bcp_47": "bn", + "task": "translation_from", "metric": "chrf", - "score": 0.5943452555220106, - "sentence_nr": 9 + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.3665134361137304, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6118771029352303, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5764325110247531, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2799331151961311, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.6471892368478446, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8142499721936278, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.7012294787544179, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8478115719875968, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.10772332006118607, - "sentence_nr": 9 + "score": 0.19464521962073492, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.17652714369664665, - "sentence_nr": 9 + "score": 0.5838790966762375, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.11254397891886614, - "sentence_nr": 9 + "score": 0.3390387389794623, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.20623288988983426, - "sentence_nr": 9 + "score": 0.6170420596680538, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.1100081929352474, - "sentence_nr": 9 + "score": 0.3142665434344143, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.18967061672400035, - "sentence_nr": 9 + "score": 0.6466526067220029, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.27190910124573536, - "sentence_nr": 9 + "score": 0.4216890913810254, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.5173567851798608, - "sentence_nr": 9 + "score": 0.6885217194158456, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.24493390281390082, - "sentence_nr": 9 + "score": 0.5014756677893482, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.48113625107113883, - "sentence_nr": 9 + "score": 0.7958858211784339, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "bleu", - "score": 0.19476681308252697, - "sentence_nr": 9 + "score": 0.6255340042200862, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "translation_to", + "task": "translation_from", "metric": "chrf", - "score": 0.42030407727741037, - "sentence_nr": 9 + "score": 0.8724783049357475, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.32365795029773287, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6509517796070665, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.5088645484558708, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6991726442472661, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4101479464529936, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.7041976254287654, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.8020845125558708, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.595092211343687, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7971172820981081, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.44787223195695314, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.7968980206907678, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.5383680940297331, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.786096406361039, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.6061131723054572, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.17905278399134197, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37257295447029826, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.1418524086391329, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.38295770773758747, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.19074380068002203, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.40566585096277824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.17382347640129553, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.4061580777885601, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22381487678101888, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.5249370100068887, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.09431297723472011, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3616856339096348, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.10666682719585797, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.33462901494141756, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.14557808399334188, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.36598346755702993, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.20198948917565754, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.34858221035657466, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.16780109158842918, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3968694014697679, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.22669486951066523, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4484451941575473, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.11697642623186386, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.37117753637984835, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.09916009482330297, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3032928217006101, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.13805615693046389, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.40787998733941394, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.09478705591775652, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.33293232395887284, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.19148282873929853, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4707949702068854, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.14057105892389254, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3028381427383384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.12157241570357182, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.4080990097991491, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.598931508663349, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.7353063745802827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.4262221594184117, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.5886657414856064, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.3315037521841549, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.468197879470805, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.43186481103649477, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5792139686527714, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.22218130727359342, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.39929356245904674, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.46092611919700416, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.6365915338629015, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.26538706048179084, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.4982627378595717, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.5234484809182233, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.6658297773613274, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.15161074985415177, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3796830006266126, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.3815250264738168, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.6516314751979607, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18679710353734788, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3876457319870774, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.40003810431098236, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.5899097408105687, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.21812881407613688, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3598346059855135, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.38047531731529327, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.49485723102957346, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.24586918158076287, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.4658595745396681, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.4489235959690452, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5934678825154104, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.18559542135951204, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.3804842882867387, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_from", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.15122189206102096, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.26750110507308866, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_from", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.30327872414714485, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.49804213541579834, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.21685485833927476, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3714219747170047, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_from", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.33752742535974617, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.112289032173749, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.17726100052085036, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_from", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.39962545473912425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.3710595252626966, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_from", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.14326513489612383, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.4034278533385552, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12666372160329223, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.2650373529479294, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_from", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18294117097472648, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.4383387744769579, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.2922087191170089, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_from", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.20298407172594946, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.427376330935813, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.1740044679403827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.36375152376157177, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_from", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.15626231814206226, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.2918712789926548, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.322788951728102, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.40263021320001785, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_from", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.21397099133614067, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3568171392601981, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.16925466459550803, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.35912398848424326, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_from", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5745954681260859, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7920051188244848, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6358921902612438, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8041899227402122, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6299285159340671, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7993134129243716, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3572514590810421, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40312319760122833, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.35059076445515835, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.40219803477483124, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.41316127706749806, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4430321339435623, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.9878765474230741, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9958930217841712, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.8780634320789833, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.926946700115022, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.7964573357809173, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8458636471716781, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.35601247064914876, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6528728847159075, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.40673971192998765, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6897190926100627, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3707525915417785, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6481906761834414, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5724622291345857, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6818279156433621, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.703373719677874, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7784050705257474, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.469958733898233, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5843756060033074, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2676232320051144, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5440246804235981, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3489926819498492, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5715668842319502, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2786169604662155, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5267252236203236, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3765213224289163, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6469521424555786, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3410244689880313, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816669416914216, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4234343012313773, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6625289905598352, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.44219732271776674, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6193429426274062, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4324680011853555, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5877600878871951, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4493940083619696, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6230960824462234, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6638859619095425, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7874224590682172, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6947677373756656, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7941300666655116, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6412098671661826, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7665040244283648, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4845227999608418, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5968050469845498, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3861375213265022, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5122109329134508, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.32539921259497445, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5133457276293165, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.28822910320599077, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6087031937056202, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.3880515884750121, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6587916715823183, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.5142726846179982, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7344716263345912, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4094748015187699, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4288513205758089, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4487746167679644, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.4476730201191672, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2836623400057614, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.29147337237183046, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.16950698451288215, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48668984177868246, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2113054108348111, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.44238229987470284, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.26207903587847736, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.50073123223194, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4527112325797497, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6708989870027865, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4556160153884204, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6661994452325181, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3216756020053242, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6141241026166391, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3977038258772401, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6202897864314184, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.37570809340937233, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6339141734561076, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.559332422592187, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.733291190094771, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3167585643537871, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5076869840147092, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3446592076818278, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5819912583909785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23270938096152352, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4490269267329941, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.44114781827798216, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6241365710582877, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.529527758323629, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6540432510655854, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.49704232910799745, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6453248294274054, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3542266508664836, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5643413028542406, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3479698393875884, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5760833125751785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.24373253714463095, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.49482039214573803, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3995439803178399, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6021193793256325, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2988697040013311, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5442522660489195, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.500703635659656, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6501904887399698, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2993081268625724, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.47777429598730525, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.43330223254789785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5564499529933307, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1466607445607986, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.36552963821230766, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6837528314895732, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7968789890147058, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7017829861193574, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7743327021667388, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6961795371760597, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7859480663394858, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.2360941227140328, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.35939098278145853, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.14118350058219528, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20431837779877604, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1811004938014804, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2649993136544717, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.23649053182388327, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.4127382174759535, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.20721924345714232, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.36475932190367044, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.11386607947762988, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.33564583347921473, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.570135897056151, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6801332690579707, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.46442643702863534, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5519480629125156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6268941789647348, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6958291103494518, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.21305368975019265, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4371748197696026, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.22837680015088951, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44164180234500505, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4151474543103342, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.638952468710771, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.20876900081884944, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3981381071356935, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.11634129390828839, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31530902302000635, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.19544795798162903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3835451743665027, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.37917766663411384, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365794450039074, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.359355103997122, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5589602235417395, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.4267520229161, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5518115366540288, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3221305290185444, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4866081657424789, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.35551034193127495, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5627284645723449, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5039752490702457, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.613669501327356, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6260375038358343, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7803415401430737, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5088535943352446, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.625202596789752, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.562048819850726, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7192054483864224, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3146726146646545, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4709531555683, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3941975148525721, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5191046479503385, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.262633940062176, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.41923206553744197, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8107492451395732, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.900032747778274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8761560783209453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9484564543183253, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7505336182671021, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.8401910628269498, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13725861056573663, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.11147384852362276, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.13453927150397377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10522974272748564, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.22055493694673897, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3931965048763613, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.6885326214539055, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.8229812189228393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.839587623092576, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.9096086668952811, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.412295470431275, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.705800771033924, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3480442076026084, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6142483232997242, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.2861853478258715, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6401604432917332, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.14172292406325543, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4762857001428092, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.17401517708317762, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.45006261596496794, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.151240443751577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4224869587588239, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.21850594525107195, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4049269026117245, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3614856639698008, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2255489037266197, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3954925749722234, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.2466674257522263, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.2929807168354841, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5975595069845072, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6689604664235209, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8010329764520807, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4678134833959513, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5051480556620123, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4113125177363443, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.42808075762838727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.32685141385924577, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3758692873615971, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27341185048222727, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6411651849711889, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.6237774736059616, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8500131524897436, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.573764722928549, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.817979859532479, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.8482942955247808, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.7215691881328408, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.735100789804592, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.4765874091118851, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.45911557772276623, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.45022125383821326, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.46874267375238576, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2028736642487601, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.43458947791319813, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.3013901676230198, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.38106012955734714, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3820562306791339, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5488313413379253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.540550443602966, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.23189835231884592, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44157797833899437, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.2294068720558097, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40458364050078693, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.32588643749980295, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31430120091187586, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5013155459452984, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.45237912327122276, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.6299071573751139, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23736810439041953, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4004852416401387, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3722001929300059, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5252698638532942, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5605065818946205, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.586853267829013, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.36300296341860155, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.5816676674074003, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1258907882951215, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.34143648068854054, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.28765408533715414, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046608868073569, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.1819722649161304, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.44254730215235283, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.28295596283263513, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6067794553589253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13305199541830684, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.43244987270004115, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3377385620641691, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5256128450453542, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.1175771442804648, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3103572690939351, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12546912767038895, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2651343523961406, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.12407216162020399, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.2664864612493293, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.2887138086538547, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6342291345998248, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.37589902061551017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.42554151277542873, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.20748131961458333, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2716205232346228, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4135171000263379, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7050151549073953, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.42988105429544615, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.7577244658187771, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.5366411241731205, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.825566494253596, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.6976333495952621, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.8331572107884448, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4165530720734658, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7027805129995731, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.4027788021844849, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6872835607174038, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.498704623570665, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6478746389895599, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.41307323705325416, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5785653391533346, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.5248587176134882, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6664855309004869, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.35210829264331733, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5239651686730163, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.17729842264695017, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4103582047611184, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.34895836374229405, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4767378358574124, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.833078701050083, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9482515348146272, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8958039312312598, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.5805399561362194, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4810464260105228, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4500531895417844, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.43027065541050147, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.4933292241270431, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5225247297523148, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5064127215831256, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6516332048338376, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5212982931053122, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.35319015092357736, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5822934956325967, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4592978565863154, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.3855522725905196, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.587260566914102, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6368371029698285, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.3450219162509876, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.3993348853061597, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5561195823338172, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5362935676066722, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.5803515898273521, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.5422220468910552, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2562150245540302, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.47046477830594896, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.33438299066966715, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5409759573191787, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.37854068916316835, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5743796566387722, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.5521710658453207, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7317828775912516, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.45506803308128024, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6477506541284608, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.592313615748771, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.7382416555842614, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.4018202851356865, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.6003256951549871, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.7542976177437886, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.18207052811092134, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.4504432021668592, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.37717457428685847, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5554130492458337, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.31598923484911084, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.6425503166524515, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8078891929749037, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.7629273292796576, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8510385544954956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.8725129388059689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.9495292423959529, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.2147607499133801, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.3976144917079093, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.49349163706233623, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.694445271037971, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3037643089519314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5183662698462751, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.8253498772794055, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8529564805429163, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.7944837206494969, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.8784531740275225, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5924993690004501, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.5828833474188783, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.7908226509294533, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.38694317759010316, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.5953878513137957, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.44711013370113256, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7319347493436125, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.49023502313124495, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.7638414724136195, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.4424906782646928, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.705507971295129, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.21326369102393236, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.24781828193168487, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.07860105393900486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.09678377693633947, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11601141307045003, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.21671187566850864, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.2577716972449781, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5171901208397282, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.35015224715252113, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5701648579139658, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.3349252032650068, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5908087431574293, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3274016883618531, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5570399656004248, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3449058130015412, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5365619830343804, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.29688845677442144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5494319015457763, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.207314191412716, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4360555836773355, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.25376032254696296, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5334329403985332, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.20039141607873007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.36123312088832493, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3235473265529593, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5441122251341168, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.32707695373369694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5166643606783462, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3069937936246452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.5024648105961349, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.26513488970168847, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6029932145447834, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.3927237741677927, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.7451438087039315, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.5570357635362685, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.8116469942298856, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3860973950960897, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6271680934322363, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3826576187198625, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.6071841372061269, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3447241447679157, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5531085140985558, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.27710310401156996, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5216248191624099, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.31128635710849173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6304411194127884, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.13308561809919006, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5312476702183977, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.20475739007221866, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3934874462686164, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.2879556779114461, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.4554184077174173, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.09578921953028982, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.40472887922389433, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.22816849039973935, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5295534280606148, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.348007986647201, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6148736550683231, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.31222258402876674, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5549937870516303, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.10721126066665879, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.23683075175361493, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.2631328190836655, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.16455392433653304, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5707860320039717, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.461597801606675, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.6280777654467244, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.4224298950114519, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.60823085524287, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4141871474340027, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.36769040719718776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.4064141882459388, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3096036988813059, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5894510883198948, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.3286711939680359, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.5944310794747374, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.13547277341758465, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.4830189619506113, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.23841754841770157, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.34481325534410395, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2822535302220024, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.3874773378787974, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.31747697264511426, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.40797778663955364, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6793717376740783, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.46935933364934335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.773055573548356, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43368945552925614, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.640995178057518, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.20050320605789015, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4046291070099031, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.3160213610127146, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.5165614670038283, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.30758744700466467, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4684197705189288, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4161791450287817, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.7054426787013603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.2961516536011624, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.49803924348035766, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459667618766101, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6170810606402402, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.46832763312452297, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.8176110134774669, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.43103580001357805, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.6690742226623104, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.6031612036218008, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.736286703381354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "bleu", + "score": 0.39432344823662835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation_to", + "metric": "chrf", + "score": 0.5943452555220106, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.10772332006118607, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.17652714369664665, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.11254397891886614, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.20623288988983426, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "bleu", + "score": 0.1100081929352474, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation_to", + "metric": "chrf", + "score": 0.18967061672400035, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.27190910124573536, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.5173567851798608, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.24493390281390082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.48113625107113883, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "bleu", + "score": 0.19476681308252697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation_to", + "metric": "chrf", + "score": 0.42030407727741037, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.429512074830509, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.6066779955199886, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.3459789902390003, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5620330456296532, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "bleu", + "score": 0.37825713491091884, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation_to", + "metric": "chrf", + "score": 0.5584414289480568, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.09596136927307748, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.40849147213099996, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.1845747513433909, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.44379971518505973, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "bleu", + "score": 0.18212463619188357, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation_to", + "metric": "chrf", + "score": 0.469592540371137, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.22972631482860506, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.436102988762466, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.2517176762753373, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.45137344500317134, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "bleu", + "score": 0.3128384316903283, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation_to", + "metric": "chrf", + "score": 0.48016279207050283, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.45026965676007474, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6668256174353906, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.43200638115383627, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6892273787708799, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "bleu", + "score": 0.341195158470265, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation_to", + "metric": "chrf", + "score": 0.6539473951166187, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4710260495003035, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.40475700826319555, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation_to", + "metric": "chrf", + "score": 0.4349871720911447, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.310186302993101, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.5434540129901786, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.5244380103905697, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6914581279144536, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "bleu", + "score": 0.4472834999328078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation_to", + "metric": "chrf", + "score": 0.6457130269652316, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.17207258849758605, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.3052503498954155, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.504154287515855, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.6074467585243234, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "bleu", + "score": 0.18771816026273827, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation_to", + "metric": "chrf", + "score": 0.37594160796244835, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", "task": "classification", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "es", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 0 + "score": 1, + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 2 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 2 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 2 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 2 + "score": 0, + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, "sentence_nr": 3 }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "bn", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 3 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "en", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "zh", + "task": "mmlu", "metric": "accuracy", "score": 1, "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 5 + "score": 0, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 6 + "sentence_nr": 4 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 4 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 4 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 7 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", - "task": "classification", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "hi", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "es", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "ar", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", + "bcp_47": "fr", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", + "bcp_47": "bn", + "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 9 + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 9 + "sentence_nr": 5 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", + "bcp_47": "pt", + "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 9 + "score": 1, + "sentence_nr": 5 }, { "model": "openai/gpt-4o-mini", @@ -11889,7 +21441,7 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -11897,23 +21449,23 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -11921,15 +21473,15 @@ "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 0 + "score": 0, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", @@ -11937,7 +21489,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", @@ -11945,7 +21497,7 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -11953,359 +21505,383 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 0 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "fr", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 1 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 1 + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 6 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 2 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 2 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 3 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 3 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 3 + "score": 1, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 1, - "sentence_nr": 4 + "score": 0, + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 0, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 4 + "score": 1, + "sentence_nr": 7 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 4 + "sentence_nr": 7 }, { "model": "openai/gpt-4o-mini", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 5 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", @@ -12313,294 +21889,318 @@ "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 5 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 6 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 6 + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "pt", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 8 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "en", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 7 + "score": 1, + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "zh", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "hi", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", - "score": 0, - "sentence_nr": 8 + "score": 1, + "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "es", "task": "mmlu", "metric": "accuracy", "score": 1, - "sentence_nr": 8 + "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "en", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "en", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", + "bcp_47": "ar", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "zh", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "zh", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", + "bcp_47": "fr", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "openai/gpt-4o-mini", - "bcp_47": "hi", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-4-maverick", - "bcp_47": "hi", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, "sentence_nr": 9 }, { "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", + "bcp_47": "bn", "task": "mmlu", "metric": "accuracy", - "score": 0, + "score": 1, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-4-maverick", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "mmlu", + "metric": "accuracy", + "score": 1, "sentence_nr": 9 } ]