diff --git "a/results.json" "b/results.json" --- "a/results.json" +++ "b/results.json" @@ -1,57002 +1,48002 @@ [ - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5679608237702286, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.746881923400435, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4438455475739657, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6320800718582147, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5894973558751632, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7562097956860054, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3846086976522069, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5835344719191324, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4804215535486392, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6694735319785804, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2511517944602615, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4484633445384819, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5820808184424484, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.73788733854976, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5749603738163459, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7240488251574404, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5617561349997696, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7132694856647042, - "sentence_nr": 0 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2963216580569375, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5101500486835966, - "sentence_nr": 0 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15317719477157257, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38800976493585004, - "sentence_nr": 0 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6001453932849357, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.762029391170019, - "sentence_nr": 0 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.30676942927198475, - "sentence_nr": 0 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4968492831219663, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32063971770635635, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5206258401513325, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39086127104761287, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6239956806265569, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3020679767949182, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5246291817407542, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.29261990846502584, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5207965578474395, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23343658187420896, - "sentence_nr": 0 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5188968707275573, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2920008662633279, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47119207959541226, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2596939072050362, - "sentence_nr": 0 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4394574387008692, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4273817965049865, - "sentence_nr": 0 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6016204186733703, - "sentence_nr": 0 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 0 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 0 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2777551012631926, - "sentence_nr": 0 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49423240120783246, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.7964573357809173, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8458636471716781, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.34633672321253084, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5378805625051344, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.3582301850807646, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5380305837807603, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.300740577257699, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5272774705181614, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3099603853356145, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5209233176748354, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.35580399268816465, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5392592206305507, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.39317381456022266, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6026058740561834, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.48930936408255293, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.699085629239476, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3963410285961713, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.613166190285915, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.44294247711132617, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5915660675216782, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3756985486608933, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5991443770283833, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5009456904181451, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6893719644090858, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.18273944860385094, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.44261865187418153, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2153742037697241, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4581737688885401, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3372953649368346, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5482505380106469, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.28528905353056333, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4885812318466243, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2935204022158406, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4867597973247361, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.2929684584911775, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5038324436049059, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4034224234291925, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5736798834726872, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1077205146963877, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.428338145564396, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.22327767951697297, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4063556880747369, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.2572733200413211, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.4520014138562526, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.40311197004738203, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5788525108956781, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.45313578977486535, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.6160993561903745, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.2651736858432996, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4491383344282561, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.34545319957597864, - "sentence_nr": 0 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5727052860304503, - "sentence_nr": 0 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15815751066481462, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5152611872266766, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12903696060775005, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.456225988032654, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.024459391267874976, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12351824822447692, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.46822754470803873, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3465147345201782, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.08516700886866406, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4091252890943268, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.19194937906573872, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5477665664300843, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4370196290761142, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20669086265781264, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5076721272198604, - "sentence_nr": 1 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17630490037560695, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.48116430160978857, - "sentence_nr": 1 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4122750002638689, - "sentence_nr": 1 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15412719160788987, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5010353699512481, - "sentence_nr": 1 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12369892692249995, - "sentence_nr": 1 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44549610902403686, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12560672881768975, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4969560260291519, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17077058518804336, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5022008374701596, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10784756064735967, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4427230465401631, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.06735571462439276, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.38102852892512806, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.42723260976616784, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1694466724647263, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4902502031746037, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3532931581623198, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.175396614619324, - "sentence_nr": 1 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49736499605529066, - "sentence_nr": 1 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15154395847232716, - "sentence_nr": 1 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.46053919348995803, - "sentence_nr": 1 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4041678259311437, - "sentence_nr": 1 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1290514243115152, - "sentence_nr": 1 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4766581477336301, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.08273178236238297, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.36399666460809255, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12601482779921785, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.43595665254608706, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.40959087443621306, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6348509381122925, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.08214106568089705, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3969463877642616, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0744904632040495, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4111163205685468, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12894104034845807, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4486368934849452, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.10070927557742705, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.43718220262892105, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0772718393063023, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4203683137304257, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0756907193511249, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4138725093679467, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21748353646757182, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4462746462826943, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4179644538349004, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.10505106462290037, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4474870048911137, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.0009218289085545725, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.15653859793617866, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.43177798053127925, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0891537192318598, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3970634926176537, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0950136506275681, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4372017487229785, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1259356760989446, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.44568274520971096, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.16322494183480127, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4815584993817062, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0904087252785689, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.41830513174690515, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.21351902664706998, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5130443042033361, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.16269986423611488, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.06939838145153245, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3371547585108182, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1691386174483793, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4920789340026317, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.14944432524273302, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4972796478830659, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.09793316925795417, - "sentence_nr": 1 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4297577431879659, - "sentence_nr": 1 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.38870674200492367, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6484380084879691, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4923751299732868, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6853756490381199, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3996712647649035, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6353525755760105, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5115346945020283, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7037574715738644, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.017834618169115152, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05927156798818119, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.23904922011090457, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3399292774084129, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6152980280400979, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8311281590297233, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.005449161724399305, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.026158029267484995, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24508104771894088, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5725552336126134, - "sentence_nr": 2 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20801258614305904, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.26703508536995574, - "sentence_nr": 2 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.35315040956049437, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.625895188503691, - "sentence_nr": 2 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11133996756497437, - "sentence_nr": 2 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4410280353998367, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17743299460161885, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.43071271897416463, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16052654068024738, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41580120868053494, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.05963579607071745, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.31139762378406344, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.006734847287559362, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.03408121951468736, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.09880177230676102, - "sentence_nr": 2 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3297638349619511, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2377604053257556, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5662768009060447, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.22573408807826306, - "sentence_nr": 2 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5444672928195973, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10742716472890976, - "sentence_nr": 2 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.42694859148910824, - "sentence_nr": 2 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 2 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 2 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14745870033404418, - "sentence_nr": 2 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.475170637938921, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.21665407194210906, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4344921442639243, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.37994652561206577, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6464467277069994, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.09362261118571368, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3452056942265759, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.18917620656425485, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4346170232980484, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.420450507904553, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.6503146347305717, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.24894072982768842, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5212235893093335, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.393613605227227, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6492198447661237, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.21147734744561483, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.41020178654369294, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.2329856851831642, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5405751250637106, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.41756686236967944, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5616829345739638, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.38189567401226293, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6154314825900052, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.2126707920684064, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4659908460634765, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.23240102389974368, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.4973274282641141, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.17979384730979156, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4177311931467539, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1702602472176709, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4366640707779677, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.933651069586263, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.9586507529693243, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3816408219023713, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5784105768028126, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.18398226639192106, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.37285010531146734, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.26958884543190903, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5631664732610485, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4005296397635166, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6201785376974677, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.15956483578595942, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.425693420655628, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.2323385180696658, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5019509292309764, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.22952177306405494, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5279520952576137, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.3618488169166299, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5708179622131996, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1712766252338756, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5225554962608486, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.2709079038456153, - "sentence_nr": 2 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.447458019441992, - "sentence_nr": 2 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.38249626297768063, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.40976234193505356, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5806197937310393, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7346706700987636, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5793367580502561, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6502428441722727, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4855332614117322, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5299556742893647, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.19940445989088915, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.43164821827950184, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2423441824135159, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4429509373913047, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7056438934239434, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6064630666233242, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6752055521830945, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5357110024227318, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6365941772753647, - "sentence_nr": 3 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14790264259417688, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.27159767590045303, - "sentence_nr": 3 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4751132438608344, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6849386986272349, - "sentence_nr": 3 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08635800047213174, - "sentence_nr": 3 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.218109371254876, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3682311523733465, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11739521786077453, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22090491782919655, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.280413108453108, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11547518641061649, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25945846414490087, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20233074088759792, - "sentence_nr": 3 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3746629492952356, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.40214612768560637, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45128424593135114, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.37284875432797243, - "sentence_nr": 3 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44888401040760956, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0925329498915617, - "sentence_nr": 3 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2110486160692096, - "sentence_nr": 3 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 3 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 3 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.12453389344594705, - "sentence_nr": 3 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.141543757252386, - "sentence_nr": 3 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2594145364221844, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6244631487487835, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6931369519059803, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.25383339228798274, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.45896379476820603, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.17200767571780612, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3723150838362789, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1477219991186121, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.28685201698226354, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3254455687469726, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4474512036484817, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.23887527917609022, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4120359948636439, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3556521383601747, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.594830811413066, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21629114799587432, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3542320138389837, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.580451128369423, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.728208634600343, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.27405612859390877, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.4639958592456083, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.13004800471424346, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.28217142159025543, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.37821486365532614, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4718665834023439, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.3699382260470039, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4032851361478274, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.45167594566243024, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.5169677927619225, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.23386786214190372, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3780009826926042, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3925121365052661, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.47788592802001717, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.2596718628394258, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.3572188192648703, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.45381175288762937, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.07425055521504613, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.18122341046764998, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1978585723043446, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3527599187160617, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.2523019529343173, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4406369072888057, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.41072675483179805, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5635589150380774, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3883375900135818, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4643731845106876, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.6242817472465665, - "sentence_nr": 3 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7123666275414222, - "sentence_nr": 3 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2246029757863831, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5773502691896258, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7999099314029202, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6417603075499863, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7825422900366437, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8503171627677965, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.37709297891717664, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6881502501430368, - "sentence_nr": 4 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8003203203844999, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453478043428296, - "sentence_nr": 4 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5581982021478125, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.652013511062815, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.629039349740581, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5881561248602009, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.40435987083533204, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.39858613265631837, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4425973012069069, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.47160616105623426, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5309982646782259, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1892240568795935, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6151179643430991, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.41238100267720657, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.39909989628767284, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.17181529671327242, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5293474685884572, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4429196299668147, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3830425592586042, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5802683403568892, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3237722713145643, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7426638026175545, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.49342175914364256, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4352628824108997, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5116862201536014, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.33471616336068044, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2865612242047131, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6433813179203622, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3598792258309727, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5125809225356253, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5539920925426138, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5226572946586268, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5073395824633415, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.29382595610734974, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1667955161379731, - "sentence_nr": 4 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5773664661124461, - "sentence_nr": 4 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7013062757071812, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9303769449292738, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2381658499765768, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9063898435384111, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6756014232714684, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4529852871970908, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6941474239078328, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8522456714074852, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9096914044088521, - "sentence_nr": 5 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.9457416090031758, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9892952933418456, - "sentence_nr": 5 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5087473540251254, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7647955332172516, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5738396574789242, - "sentence_nr": 5 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.798357133373606, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47410002229034043, - "sentence_nr": 5 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7538467008030766, - "sentence_nr": 5 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4234885228074744, - "sentence_nr": 5 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7410180114887145, - "sentence_nr": 5 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47375069012411286, - "sentence_nr": 5 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7543919667018285, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7829685247145245, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.40276720463657734, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.6529271690805427, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.30188353873287377, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6086565367747951, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.6026286934891149, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.8025775976044891, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4596980088392874, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.713787745993602, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3272712268138726, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6272846474183881, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2981792160679168, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5788026000794341, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.7012294787544179, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8478115719875968, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21690365808279138, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5384773678665918, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.25711386542134795, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.6088853751738869, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5695988432761473, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7516103467926585, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5072784644062104, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7361065921505279, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.38091370416670794, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6438225861756911, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5091224918749461, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7202697992734389, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.5309354663044072, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6990707992725005, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1673872929477023, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4506667273103674, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.6917901740466924, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.8479928839177578, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3416581331218724, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6578570934289981, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.4797543511401896, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7240781310560407, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.6401876410870359, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.7526484951226097, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33057129676705455, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5669225664686625, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.6004981752197522, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697646564917222, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8492326635760689, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9027320255916917, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30350690419450826, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.569133886912883, - "sentence_nr": 5 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6834516951654327, - "sentence_nr": 5 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3263040636562357, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3751840463233443, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6279894552667558, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19268479640608693, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.551397074868541, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17470942957770763, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5403400891349619, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.37392149096896676, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6665214662145853, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5838790966762375, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.19464521962073492, - "sentence_nr": 6 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5763410052067085, - "sentence_nr": 6 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5460240376042262, - "sentence_nr": 6 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.24343304284910333, - "sentence_nr": 6 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6275577931282961, - "sentence_nr": 6 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.28571962561926445, - "sentence_nr": 6 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6431872581462166, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.6255340042200862, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.8724783049357475, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.48994561421713123, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7411155087367244, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4547900039222725, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6541971428810075, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.23198210427894825, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.630711601223299, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.7122562458056777, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.32263864160302524, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6824395076981005, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.38305978177479755, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6061131723054572, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.24047860794644352, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.58198979036704, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.7511573912724299, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.9453473543978153, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.42984824697674956, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7289444696770301, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4881010344921759, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7317734491561229, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5971070986250356, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.8874294965619517, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3684981984538114, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.5606332518476288, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4536404448264584, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8020827133708689, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.4545091839935173, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.7166050399790445, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3370129264673147, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.7096874943799061, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1624355752882384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4952968469712617, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4831233610237384, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7807505267551733, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5595205105615875, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.8322210048001876, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3142665434344143, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6466526067220029, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3610544299180199, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.49125115898082056, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4284945090100314, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7164026439677106, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.8578928092681435, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.9422733087334002, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.42818224355402373, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.42105372680687736, - "sentence_nr": 6 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7001171094008295, - "sentence_nr": 6 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1582866049832572, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.34487142413575794, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15521606028436608, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37645329404497957, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12620429887108936, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.35580703793872603, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12872220631084524, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33602633953270183, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.042121062429802174, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.14281404499176092, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.042575418285137674, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05173688961049459, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3045613775157565, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5275070803493389, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2734283774929853, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5252214120598302, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.10203846572325131, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.33381153680096753, - "sentence_nr": 7 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.014935758919429663, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08106107745254391, - "sentence_nr": 7 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.044304867337633724, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20806974344498103, - "sentence_nr": 7 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08860973467526746, - "sentence_nr": 7 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3178004360288637, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15268019045355535, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41028757620299977, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.030860166165309233, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1100250143829584, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21255327712152144, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.43272151570555034, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.01486609147288197, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13893773605583024, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.06609667473412645, - "sentence_nr": 7 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.26197209338359717, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.26064517697298795, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5092206110218525, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1507980395794452, - "sentence_nr": 7 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4306039128585424, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1438459189500836, - "sentence_nr": 7 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30693371625402605, - "sentence_nr": 7 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0979038733644086, - "sentence_nr": 7 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30211704738953993, - "sentence_nr": 7 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.009624974244068071, - "sentence_nr": 7 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.07318255686027669, - "sentence_nr": 7 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.043420474648595074, - "sentence_nr": 7 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2884095690753619, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.13868172938464635, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3094469764260441, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.11091252683001185, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.26607634610445896, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.18154954789336694, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4557483776072868, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.1381751568911733, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3121557499162649, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.20065115069964384, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4084885616013531, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12291219097556666, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3448002180666873, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.20608572305725564, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.4704943905570542, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.085416483900781, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.2825804066750608, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11452508920842025, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3212742401272785, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15478222669012726, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3550584759508654, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.07875433150726119, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2638954513805452, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.10734088848154077, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.33946796348247366, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.17795920517030017, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.41862955401967455, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.19388048412249795, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.44361702376789247, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1237012344369667, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.33331866832253354, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.15589802574348086, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.37894206802233305, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.1948502778967486, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.35525815981538433, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.1618333627385132, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.3458746996740858, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.17393111207515277, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.39042812195808824, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.19064689695123957, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.36954921822756504, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.1785851272602057, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3800733399524004, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.20113943179758872, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5054929215592371, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.07088281524771703, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.1725752257112697, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11901413329120636, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2908877283991857, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.15593857496482408, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3832822126692406, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.21107720643690867, - "sentence_nr": 7 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.43911506176829573, - "sentence_nr": 7 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13410301071131794, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3942932268034351, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.30677064886592076, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5308555945242818, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1327526847508867, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.37850602486495205, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18405035438430847, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4142901090120915, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.061826017721563604, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.08852681798207009, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3583179111355935, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3857436691295343, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5750224388123065, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5920893212447781, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6925021521158101, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22478613858269392, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.44348101018104913, - "sentence_nr": 8 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.183687049781416, - "sentence_nr": 8 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.351911486970854, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5181825846579515, - "sentence_nr": 8 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17328174803055044, - "sentence_nr": 8 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3178268797869574, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.286608441075188, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4579283646292802, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25861130592298187, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.39452644092432093, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20379250618355427, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41085414309816914, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.310679343206099, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4471183729584148, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2334787866969297, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3621517589760531, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5866873582151947, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.46269559069048716, - "sentence_nr": 8 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.46872641361415845, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10434360980785336, - "sentence_nr": 8 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3012789660952507, - "sentence_nr": 8 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 8 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 8 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13835317113453516, - "sentence_nr": 8 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16343842313572918, - "sentence_nr": 8 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3986641525285075, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.30890092021323623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5553909583113487, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.1690979933029136, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3751861276375209, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2840563956846642, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5110250591004448, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.19920413481788912, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.42537796926163113, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.20401796878756984, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.43317630453631556, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2044887070217883, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.38471585132587544, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2980504190448601, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5101268920225042, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.9436043261706615, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.9880191679951993, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.18831933500600306, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4318025704181776, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21544027588567594, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5040038440508637, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.26970223719007375, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.5172978597562362, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.30630098078522544, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5439056051092116, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.19850842371858787, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.43584341835040474, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20170335119323748, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.3541251997977811, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3182774828667731, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2309552734743087, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.43975656978777905, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.23530033724858213, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.46208607300298377, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.37284027455688556, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5528347504734102, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.2887308472548599, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.38846174119508314, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.15487293534817623, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.39293494862736383, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.21741853044139284, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3535910166292039, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33626819961829335, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5466581859383387, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.32000331642122953, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5480591855923784, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.21132630077912357, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4175670766052166, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.13108369255325433, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3929302741911199, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20174045447955946, - "sentence_nr": 8 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.33729298835089516, - "sentence_nr": 8 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20972571494011877, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.395894071208527, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.35369375385786006, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13087682931309413, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19462952976787054, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.013538497707846785, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1570208067577934, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4113045280468524, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15082713742973322, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3965911699770542, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15471428129658016, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4580211317461481, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18928475425929295, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4916060435820526, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.21940429389247643, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4343280866601455, - "sentence_nr": 9 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1824401863423467, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36709433185688595, - "sentence_nr": 9 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3377854698776805, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.521201229892482, - "sentence_nr": 9 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12475846123062707, - "sentence_nr": 9 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.27823340731817514, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10759927692349745, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21065794536310511, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.07843772989359644, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1324578891826276, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.08163977068875294, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.09047502044256338, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21669141850731985, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10322985794794913, - "sentence_nr": 9 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.24491122482530842, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11809057094812304, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.27930342777387007, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21268444697113978, - "sentence_nr": 9 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3229997133764549, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1475503033983142, - "sentence_nr": 9 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22104108935973044, - "sentence_nr": 9 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16434349396840395, - "sentence_nr": 9 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28582614857210975, - "sentence_nr": 9 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10085167559661873, - "sentence_nr": 9 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.23831215045289575, - "sentence_nr": 9 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17543744527808774, - "sentence_nr": 9 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28201016956553354, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.12274092982883021, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3385513651938691, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.31017716089889963, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.21688283061839067, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.41775824162589076, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3228288840559658, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.18237599479708327, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.3740403511567824, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.12876689524369925, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3253153379449275, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.119159749312327, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.21297942664093145, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2036348471340078, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.3472831655579266, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.24362353508932386, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.28135849152758385, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.14482189302397735, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2913876815877049, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.16306957103469613, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.9199349282509897, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.28112283847231073, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1308613527030366, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3063146286877558, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.12787395553510186, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.21931515993565381, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1441966459257424, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.14957316612525498, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.27675048474641756, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.3780460244391623, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.12503614625842938, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20624064341134082, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3368893372278425, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.2961559727627133, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.12846497020051437, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2670865602673704, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.28252374116432993, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3549531183419122, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26128489301072644, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 9 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2126837065505244, - "sentence_nr": 9 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.07149097424598219, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7774075575820374, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8943538262827356, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18639667871924825, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4540232715517938, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8431643718744966, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9341410275694613, - "sentence_nr": 10 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47095916883357913, - "sentence_nr": 10 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.701526330557871, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.38260294162784475, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6692418584049541, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4093629115744712, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6243156092220487, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.36703839483583006, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6725357332891145, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4322450379367835, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.697398762810304, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41122010762096617, - "sentence_nr": 10 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6697492221087861, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41126318495820946, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7254294465493162, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4682601513034942, - "sentence_nr": 10 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.691130012325589, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.35334199245807973, - "sentence_nr": 10 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6858610070406853, - "sentence_nr": 10 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 10 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.33061666631099795, - "sentence_nr": 10 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5343307680770133, - "sentence_nr": 10 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.38981415389445495, - "sentence_nr": 10 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.665622189515994, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3223937524276847, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6719135382778884, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.4466645979681496, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.714247354760266, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.6233091888805312, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7757111039890131, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.33414322499224436, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7159580680193959, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.6620694102966999, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7893416551805176, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.26540383860058264, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.51610805930355, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.49335830881778164, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7240615166053675, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.4024279293206815, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6798070651801875, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.6153147385756811, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.8160952378322835, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.6838493012537611, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.8178509424142287, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5169198985488462, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7879691803533485, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5223010192696725, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7442134884509299, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.3885151883045163, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6763151870864087, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5985488590218004, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.8248561222494313, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.37163791993879014, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.6792432753943116, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.5152630372775983, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.7696821316655393, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.43521980294891405, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.7204319998551938, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.46417187236805535, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6653227698984816, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.519124054532681, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.7733428788002137, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.5083170211670072, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.755952798269267, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3161432307247198, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.5990810117425377, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.40980949787910764, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.7145653936496129, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5770135999436572, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.7697316849447288, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.7030214416074754, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.8357829168322639, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.49199339399396913, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.713934780293142, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.5002824356846001, - "sentence_nr": 10 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7029341279811726, - "sentence_nr": 10 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.029124970213905314, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1779610499753793, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05989397907532586, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.13539167567510446, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.03073685498855941, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08933758530290428, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21051269871304829, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.18854722085547196, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1387123733773652, - "sentence_nr": 11 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.05499461839884487, - "sentence_nr": 11 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19978068293555115, - "sentence_nr": 11 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1388011701223677, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1460389336009171, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.038796252164058714, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1756002877791377, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0029868578255675027, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.015380253532528225, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.19065171436703615, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21083781655774478, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.14590438247348272, - "sentence_nr": 11 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 11 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.04379419293412465, - "sentence_nr": 11 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.15119622228734425, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.21315318926996712, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.16991425356152365, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.22371589981083434, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.20982178138488494, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.20189358781069322, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.20261685251676126, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.226729844497646, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.18184342512086546, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.2185121523322681, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.17386106914161167, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.08272059515141832, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.1814025725787457, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.23945930551153607, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.20815933215961574, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.09886053260067004, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.14345644530149382, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.19097844728039898, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.08246021416977749, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.18868639139421345, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20665565461558383, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.17764901410543646, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.19312651305380893, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.21371557282714232, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.18854043679878274, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.19559831357902827, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 11 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1914895496057553, - "sentence_nr": 11 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6666935927206881, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7957561291403441, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.34999116613463505, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6356075517191035, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.48649824146709, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6763447333054696, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.367622917844187, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5615050712672139, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4081538556642202, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.46386216052527535, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4300174433641992, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5099800158255156, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7963205130973803, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8101688749569373, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6570128212612868, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6262090565616182, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5866943184579982, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6390393619950272, - "sentence_nr": 12 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.01047222192173988, - "sentence_nr": 12 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5683565265173782, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7072367582469653, - "sentence_nr": 12 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20287366424876002, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5368464080033196, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5198707241967666, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6993305416237223, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.36603776814499195, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45532918164901276, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.13525036115537795, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3120848453730729, - "sentence_nr": 12 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3474347870952493, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7073395735740273, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6577952971578602, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6570128212612868, - "sentence_nr": 12 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6221526807313811, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5460462259563637, - "sentence_nr": 12 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6641829079106271, - "sentence_nr": 12 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.04884431803904408, - "sentence_nr": 12 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 12 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.18357384275951122, - "sentence_nr": 12 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.28073304156067924, - "sentence_nr": 12 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.360657984953223, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.46365764298816153, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5757521453586436, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.3147715014841853, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5986154863155839, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.3885646234110734, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5051669760132699, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.464413403675355, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6291656356697347, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.30490938758882236, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.579088460457721, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3758073513458154, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.5302950018189692, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.29308025637967977, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5715200997140051, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.43285599641891276, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5551678521355665, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.25984882476296983, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6305744214119023, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.48649824146709, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7255446918266525, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.464413403675355, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6853183317800515, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.34999116613463505, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6356075517191035, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.4426623526629488, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.612058732370435, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5522004843736675, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6166558670381421, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.37954187220913477, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.5550325994532472, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.3147715014841853, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.521228891025682, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3964513253420688, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.6095420129111676, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.36033217429111203, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5550014071110869, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.33403925633579773, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5753930328058733, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44882520213790794, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5856175239899348, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42760828727369016, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6065010489098535, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.33403925633579773, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5915394296427854, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3212785834179169, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6158121620368939, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1751489536280261, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.378593296276962, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3214110553053944, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.49232390716994445, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.479033905070678, - "sentence_nr": 12 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5975149526416976, - "sentence_nr": 12 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21177549089429396, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1424915360855107, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23985076149753726, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13309638637723345, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.18696197122203645, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12256515595630638, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23303109995893123, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1419886619859991, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.24113733359485448, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1324448705928064, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.22863839042697148, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12017886776600228, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20794486026487116, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1164257728844972, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19249901344360867, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12325384013681445, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1960232617116645, - "sentence_nr": 13 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12806473847444227, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20054688779645718, - "sentence_nr": 13 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1345714227066951, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21078968525268058, - "sentence_nr": 13 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1802615495980454, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19630112442374525, - "sentence_nr": 13 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8212614342207556, - "sentence_nr": 13 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5888582552569348, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5888582552569348, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7876222308170935, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.43550490048931545, - "sentence_nr": 13 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6419345531187637, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17539593635425982, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3139104155809725, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39225487001250453, - "sentence_nr": 13 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5189967318357492, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12859070457371286, - "sentence_nr": 13 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22162336097079333, - "sentence_nr": 13 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6871546336787117, - "sentence_nr": 13 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6258765997974801, - "sentence_nr": 13 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6680248455809015, - "sentence_nr": 13 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6258765997974801, - "sentence_nr": 13 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6722124517361844, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.17023327167529265, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.2521455524828544, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2229548791980166, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.15247670030930355, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.1324448705928064, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.23382021475411732, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.09766807787022613, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.16788063248730647, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.12111615182138995, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.21505717177216926, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.09979796185764318, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.1310501345458609, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11512937599552589, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1852451960926282, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.126642985054506, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.20913543330915318, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.12632059501697884, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.22490978846607526, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.1352612651586241, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.22176710342008016, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.18982400330057914, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.11760179026027952, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.19531596229980544, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.09968269909242322, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.14510210137368384, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.1204925245474865, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.12192273449574796, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.18177358407861108, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.16841504132177978, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.10667790151233097, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.17427579502643556, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1508875367739971, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.20889434105456664, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.13184959768302618, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.30505662513933907, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.09878901581794378, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.1651800705978423, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.13150403915662862, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.20736628090200235, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11824658049755846, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2047497542808756, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1461072488843534, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.1946917085815184, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1018151014848322, - "sentence_nr": 13 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.14524830913329922, - "sentence_nr": 13 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2589080403198245, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2189767496390278, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.09761931247072746, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1397102655312677, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1326689502117876, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.167569694983793, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15848968577272604, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24447662789322752, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20665940380705064, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18243716955007858, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.16168125580314086, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2450013599045987, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.20901732384345645, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.20222677481313764, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.18492694642397273, - "sentence_nr": 14 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.18243716955007863, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.16667457585564618, - "sentence_nr": 14 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.08556679632324991, - "sentence_nr": 14 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1575852366903021, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1474874322154398, - "sentence_nr": 14 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9202663016973823, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9263876898254182, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8621431910551439, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8363304387269249, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6656058483395763, - "sentence_nr": 14 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6306557167105028, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8657947138469048, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8367521498141209, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6993348038140574, - "sentence_nr": 14 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6335836519040372, - "sentence_nr": 14 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 14 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 14 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9419492177147062, - "sentence_nr": 14 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9202237383102091, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.11064738383914807, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.12449466772796605, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12222372495044852, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.12383047729216191, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.1392580908972882, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.1333265070823728, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.07717159074475938, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.09413026539458375, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.16807498532991816, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.16404257857373192, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.21005284223037346, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.1679703861465872, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0951509584925814, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.12014553061064691, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.11737915185320068, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.10085050674562507, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.11377195287577829, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1301681094143453, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.09455636771034115, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.11463120929696417, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.1544787887603271, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.1384236976807813, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.11488572123868507, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.1455973492295447, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.13735441291745387, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.20255423961944058, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.205408273869532, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.11470196605012067, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.0960438892364715, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.07184436307032757, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.20378989148152887, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.16337212771611656, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.09669863605676213, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.10886215421099144, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18171364159867548, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.16245793974098002, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.07562263205281951, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.09819928715831736, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.1430606569063152, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.128073928655324, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.09526781380423786, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.18223449608285797, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.17127401148639734, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.09855718610544388, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.09669863605676213, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.11679541132562438, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.09643517424337235, - "sentence_nr": 14 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1226126790254367, - "sentence_nr": 14 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3969253441303859, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.43277080710930865, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.26887073704667247, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2918476164856665, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5183146371291372, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5942793492554739, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.028864519535915668, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13535086012687783, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29687399422087424, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.002376388269368755, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.04574695485583133, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2703094106380642, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2982249908859, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.29313061087267483, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30295384730328956, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.284911205299835, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.32067889250923776, - "sentence_nr": 15 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.29353055611145706, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3381266475327612, - "sentence_nr": 15 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.09910529437987022, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2511990291834263, - "sentence_nr": 15 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.295394335805579, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.362515947701148, - "sentence_nr": 15 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7243776840931383, - "sentence_nr": 15 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8980107630353439, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9325718821645923, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9490053815176721, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6653044831075519, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7986980418662383, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8504591592783618, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8980107630353439, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5950322600507224, - "sentence_nr": 15 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7090542316843602, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.44768974737795825, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.45520472994232203, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6509298345623671, - "sentence_nr": 15 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7962234681835563, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41813929088914065, - "sentence_nr": 15 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4779008399806691, - "sentence_nr": 15 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7243776840931383, - "sentence_nr": 15 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8642805496461259, - "sentence_nr": 15 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9506885335787997, - "sentence_nr": 15 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9606382935593174, - "sentence_nr": 15 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8665175293126633, - "sentence_nr": 15 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8642805496461259, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.342569723746894, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.47156710056973744, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.2319934375578505, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3367678538644817, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2261681529206079, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2647144854968396, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.35554722872430145, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.38873710544604445, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3720000272862786, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.44695658930348453, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4118588818865406, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.48573453292579605, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2998354233286452, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.41144215385645566, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.42142495511264777, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.49708063531780444, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.33296735510279596, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4176386300927819, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.32522259162581857, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3572499606049779, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3449668516380805, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4341194278942322, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.36161896085795575, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5052818563161547, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.2798191316489921, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.31866179281073254, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3170440263520106, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.45327673850268096, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15538140800156827, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.22365453282977818, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1352815632479558, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2610624350708668, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.35907597395908514, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.433310273977633, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.33498522957587384, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.4529680464694055, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.30675389390381064, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.49190118767827684, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.13922661372145656, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.27553494979330584, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3515170550015674, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.37881852198491145, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.30950829536527374, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3839157172568008, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.22141947821999777, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3633108862011865, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.2957849631521743, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2872269269040579, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.19474118932727338, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3257294949902081, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.26505727008662233, - "sentence_nr": 15 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.41342120940573923, - "sentence_nr": 15 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5489548889989204, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5292552311493306, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.43141660874998483, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4251732952639193, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.479859141564773, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.47978767796651084, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2751349202729036, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.311148395820729, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5435154526669127, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5395341377171525, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5777979902630328, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6331337405946555, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6121338866063298, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6222767269627676, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5440627210252523, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5801365308278273, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5097049681318312, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5622473457673939, - "sentence_nr": 16 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.42567378467735034, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.470165978205223, - "sentence_nr": 16 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.47594607773277786, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5363851621507516, - "sentence_nr": 16 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4533373633026252, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5042718376547173, - "sentence_nr": 16 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9682566771439106, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9779127328168863, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7040822331405046, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7673268835807536, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7639225615341296, - "sentence_nr": 16 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8135226479972402, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6729400620282456, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6736973998414632, - "sentence_nr": 16 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7640211005075139, - "sentence_nr": 16 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8179683170395244, - "sentence_nr": 16 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 16 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.8509306641805077, - "sentence_nr": 16 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9162670716850285, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.43141660874998483, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.45005622460103567, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.5269212212163125, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5528502361092263, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.6736973998414632, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.7157738382386983, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.33491174038847354, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3646077683106875, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.450293182440332, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.4822292034174927, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.19834633509680927, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.2712763621688402, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.546749262754264, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5830342194369027, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2754139367364165, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.34665831783057166, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.42877544777223947, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.43803970127356867, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.43908893511874636, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4785460996828672, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5898466143484524, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6611594562951559, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.44701416909786756, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5245065297475329, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.31417347869916407, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3530975487930333, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.6373258340947424, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6437421244363288, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4715455630189013, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.543275675805182, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.2807304798995431, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.3418543172008782, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.5397682182130759, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5703951757357331, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.5446420954986508, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5662782206307382, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3378721588486122, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4362453299175689, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.49288474585647657, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5578180330951528, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.36197274748300795, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.36134314178088084, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.17060055774694924, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.2566677182784047, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.5717883675148524, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.640780099960748, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.221071468018936, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.41620491059292214, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4263215396273059, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3711481893609263, - "sentence_nr": 16 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.4101392170618868, - "sentence_nr": 16 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8813081534414112, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6486802664285581, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8066891982024211, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7344798528986015, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8855631322316195, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6486802664285581, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8585894188661937, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8813081534414112, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8434569599214109, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9123500588239437, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7849324644314795, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8934780380564308, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8799941663695641, - "sentence_nr": 17 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6809354000776107, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8640242853252401, - "sentence_nr": 17 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7838756540325346, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8841725044915145, - "sentence_nr": 17 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39503194300684213, - "sentence_nr": 17 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6916289318228928, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3094285625931604, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6328843883953666, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.30888995556875376, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6801864286113619, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5512199399393973, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.45862256824436665, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7660160731572102, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.47770079267358434, - "sentence_nr": 17 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8053780976175922, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6259358824502687, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8067950339997761, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5296344689827603, - "sentence_nr": 17 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7183083787484315, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7568440125092788, - "sentence_nr": 17 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8347576899702969, - "sentence_nr": 17 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3001800600660342, - "sentence_nr": 17 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6794930944968381, - "sentence_nr": 17 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.18879642915927602, - "sentence_nr": 17 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6584653291380502, - "sentence_nr": 17 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4697979053121435, - "sentence_nr": 17 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7554660353280213, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3164389365959547, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.7121929522648841, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.6031798395521694, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7819677495994619, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.5646631238098637, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.836206348617966, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.36615107686578496, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.696074520676609, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.1543252261021413, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4932064977882042, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.6966863379186454, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7941296295595748, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.5487584440377526, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8692797308530646, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.8787142254774354, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.944457825946867, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.5463887965663883, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7033378749149323, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.4912217876159168, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.7991339910300419, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7251215108320924, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8334871013677937, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.587725019570444, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7957550794048827, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.28856268147560865, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6187787024786685, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.4402122771181734, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.7716344099519011, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.18465966669442654, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.503938463452404, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.17973438065210462, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.5509051817440759, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.4809103179432793, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.7499547288317748, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.6244070585346295, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.8433626077474702, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.43660156107563336, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7165816705519701, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3748533897614559, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6863935447402433, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3607442374649342, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6876955247522804, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3718491333506089, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6941552634040441, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5110976370499285, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.842915559657988, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.5591535564944223, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.8079980831297509, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.11809858631445573, - "sentence_nr": 17 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5943886568930294, - "sentence_nr": 17 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1423170365140828, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38605131339325, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3230989128220882, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13860487750886114, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36659667376085786, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36295227908523897, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.13860487750886114, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36118801210741663, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.40877861250593944, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16673024281943524, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3975048254243706, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.11262865194228103, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36030161445252334, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3066941236048102, - "sentence_nr": 18 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.409404483413751, - "sentence_nr": 18 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3629681915617596, - "sentence_nr": 18 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12162779391619735, - "sentence_nr": 18 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4136500403395244, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1909693288724605, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4115524982336727, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14192760409508295, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3989311390496819, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20304460086424203, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4966336271433132, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3935462418730863, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.33523829330170474, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3250861966671464, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3051626462022859, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30944349609311117, - "sentence_nr": 18 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.11556522074454477, - "sentence_nr": 18 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.372688132616477, - "sentence_nr": 18 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.22392361812003433, - "sentence_nr": 18 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.460938469666163, - "sentence_nr": 18 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10704943109718215, - "sentence_nr": 18 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.362953271903766, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.14392660099814805, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.376362134090542, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.11718316363212337, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.3844506520287143, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.4024646900219184, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.08197539732074254, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.35287478964221025, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3502198678697797, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.376636825008991, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.30372034137078635, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.21481172921264619, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4009028477501074, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15065778147399764, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.4580508275161034, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.21281360709834968, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.4292702902558381, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.13780534982274106, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3273034480518148, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.36078900962911326, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2491467453273127, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.47986445165634506, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.10905122148101043, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4502571446121065, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.274959074733397, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.3607206140473947, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.17796237395371306, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.48209511527864385, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.13644487773607678, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.36491236604183974, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.21850577875478958, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4494281444270959, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.31361999490423276, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.1222354265296326, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3727252294250617, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.1109484758001971, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3612426584883393, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.20356858406857398, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.46358366365120834, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.11530762783711283, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.3781690117672006, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.11907182322580316, - "sentence_nr": 18 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.49599003474365394, - "sentence_nr": 18 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4220964985804286, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4455062898838481, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.32026140564476524, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4016870075045671, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.34697616124581016, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.40373943351486685, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4184617303786878, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4321132548050678, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3499900041521066, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3822330369569219, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4220964985804286, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4455062898838481, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.41428013900466737, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.425713879206717, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4184617303786878, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4321132548050678, - "sentence_nr": 19 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5102296603076779, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5412065437629714, - "sentence_nr": 19 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.32282559495424096, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.38266426308756574, - "sentence_nr": 19 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4230074457298372, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4432451111759523, - "sentence_nr": 19 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6363676859401174, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6744544901797789, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9271746317040298, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9736668125871423, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6986939462620247, - "sentence_nr": 19 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7821077250864037, - "sentence_nr": 19 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 19 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9184678024441792, - "sentence_nr": 19 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8884834862973964, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.3797391466432489, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.3481158447116987, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.31102805827817165, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3375837027261476, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.19710660977672484, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2646181750020499, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.3797391466432489, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.3274816319655301, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.28493958837889694, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.35876163607595707, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2485364833746714, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.2873862688213756, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.41664461891968263, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.42600414573009276, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2710684964643971, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.2982841390442802, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23005567239800093, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.29184715566281483, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.2741455993358603, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.36403543443534025, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.34279101776553306, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.42600414573009276, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.30955822779938535, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.39546682876478195, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.39475108115635776, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.42154888635191134, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2781617026804374, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.32302333182207527, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.08473168573832755, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.25650903369815853, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.2883871807684295, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.21660761852515356, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.25414220830184964, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.32910644083871465, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.29306886812256966, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.18084108219203518, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.27583433958197495, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.25612947694888455, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.3002607987321696, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3216291288446239, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4272249853925079, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.188590266789637, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.26177705380820604, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.3308736026652116, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3875427536757155, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.28432597056103653, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.35944124408933287, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.23631465024334478, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.2692006325646732, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.259615032947222, - "sentence_nr": 19 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.2855780701161316, - "sentence_nr": 19 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.31343233007308363, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28662182336952924, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.289946670354745, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2585958231966256, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1574562620502688, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2833933092608246, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2330649391612961, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2858508520944113, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17248469309075373, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3673041887389201, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28838937143148047, - "sentence_nr": 20 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.25480888745972646, - "sentence_nr": 20 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14839290005301392, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29565285341782266, - "sentence_nr": 20 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22266775943086, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10508106635796587, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2504422832248121, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22563365567811913, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12913533075470382, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.24776496881674256, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.08680476715745516, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22066482174709295, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12117880855911824, - "sentence_nr": 20 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.32137825349405363, - "sentence_nr": 20 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 20 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.20104685618767446, - "sentence_nr": 20 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25137213099939626, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.28372673673489807, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.164799256779143, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.32187376249458133, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.2969522070783606, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.14440617372843148, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.27200704330334224, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.2442053369522631, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.33050427873462274, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2133219421911448, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.3424665224706109, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.8944054777319608, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.24197054442617688, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.21682999057776514, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.3722897460532404, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.24424323100599224, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.2205591704292585, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.3479467223515336, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.1926917267834754, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.4545444680350158, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.17580772500133016, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.32957763052496886, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2148084015365523, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.40974307981059804, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.29622141199363383, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.24146688269469918, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.09958408398703665, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.22890983822248492, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.20795712301883962, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.282761705091657, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.2551114536415265, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.18112053860965763, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.3266298821510716, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.1423412184218882, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26467729752192487, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 20 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.286072901441292, - "sentence_nr": 20 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2851456053265138, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.09858834583812252, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7445389400758123, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9134769668037408, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2506297252541463, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8320381765431424, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9129044064886581, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.23443139907396643, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.29972668857564216, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12409597120849801, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2928237514438983, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.15083364266523736, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.38662429787924074, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22849324967229787, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44152236347960977, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2989569143807341, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4042166909648807, - "sentence_nr": 21 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.3423939053207622, - "sentence_nr": 21 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.17611268473423294, - "sentence_nr": 21 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.20441543914149457, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.18928624746011372, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.43639616127375797, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.29213008358451265, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5828788445270403, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.14679869139754204, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4021419566569229, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.329340597116918, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6347143291802012, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.2868708266227936, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5779499593492363, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3436610762802303, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.2782087319667435, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.632418768195088, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.3083012995502152, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6392851743718383, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.23050898626566632, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.48172150010681464, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.1969221590285716, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5644899370701738, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.32594818888335836, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.49646222671189383, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4604008032403599, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.7444026788985108, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.29161716271402766, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.47302621872495865, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6854823532900025, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.3546725638586892, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.21468316165048362, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6851126041819388, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.250737833894674, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.40017617077306594, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.27204995504877727, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.2743963944428051, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.6341922683775969, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.7252122374710612, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.12586347848916266, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3554854950683664, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.3889045463729729, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.20229280648000492, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6194717199605934, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.31114459650134146, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.11856660123276004, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.34601719602607445, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.40072710492884706, - "sentence_nr": 21 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7206046648616748, - "sentence_nr": 21 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22174147515312165, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2117279815687756, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.33999170096577974, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29221353951377876, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3058731661111107, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2747352174231836, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.42736771185803385, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.39727964545172, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.10975022749274138, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.13904829787402162, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2873518361947954, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.24505805183333226, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.33495074569972355, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3454509072842772, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.43090467385890824, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3813511699401743, - "sentence_nr": 22 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.22765977642995502, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2247283208344801, - "sentence_nr": 22 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.30931906627981315, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2527893205238235, - "sentence_nr": 22 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9210500207490827, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9069369532463243, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4607778969984477, - "sentence_nr": 22 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8103868370118212, - "sentence_nr": 22 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 22 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4885014761119101, - "sentence_nr": 22 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.827819363745503, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.17903870455040152, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.18440575845606422, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.1981763713215807, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.2520139548059959, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.17499310607879404, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.18175908515502465, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.10089587713517954, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.11552870044063634, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3168035112884022, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.30580678632835573, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.10825039887617824, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.1278708456868984, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.29705138694670025, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.2780223931578523, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.18986262747887736, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.18230825914917978, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.42442305789888696, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.42734795538422576, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.18781316135387768, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.16808430602651067, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.3454156644973841, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.30446460704247824, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4479597674250984, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.41132840401983517, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.10704445941620296, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.13527356658034445, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.09941527806251362, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.13609735884978696, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.19230259308735756, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.22211286692050705, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.20383889880388334, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.17813562619757226, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2986551380628858, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.30308773908860176, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.10536111661637193, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.13679626017050403, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.19732230687816163, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.22765162763479738, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.3987203877706927, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.39992851145514274, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.09467800236923245, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.11434380596647938, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.15034676904545285, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.36138016740101575, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.31224382417562974, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.345966570287759, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2816115803298224, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3461146475963348, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.30131374176129855, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.1552102601937674, - "sentence_nr": 22 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.1381803727119777, - "sentence_nr": 22 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4967067363118649, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6330776418175281, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.39501632817024007, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5629116515332234, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.44774758283371513, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6323151453499094, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3353166764160673, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5279751808070301, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3340392563357978, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5542299582982266, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2288355034549531, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.469883747317403, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5472915485853102, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7136367183558585, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6159995640523437, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8398584608765305, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5155625728615272, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6435263800797054, - "sentence_nr": 23 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.32206162101132135, - "sentence_nr": 23 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24125880497129865, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.47825499190432214, - "sentence_nr": 23 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3292010361291119, - "sentence_nr": 23 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5670300297444607, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5511532346688224, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7550305399541021, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.34537865578685034, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5956718372193373, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.39080227521872696, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.621048393466749, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.2755396296659942, - "sentence_nr": 23 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5033588333252278, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5795086255869999, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7183582779188291, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6214211316495574, - "sentence_nr": 23 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7844755306149331, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6008383045972477, - "sentence_nr": 23 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7291842011448325, - "sentence_nr": 23 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 23 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25418196696822093, - "sentence_nr": 23 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 23 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5117784549266909, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.274941620352113, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.4651004879148919, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.22743363869750483, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5634710936922129, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.518836150464752, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.6242496691584447, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.5989032124636781, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.7291306908177887, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.2677353447271197, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.4460422364967209, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.3558785149067877, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.570837784052645, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.2624310277292268, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4915471393606767, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.28489318277723963, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.6000278331909762, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.5728668995816387, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.7460634178179616, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.46507550803536196, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6687857543858925, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5155625728615272, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.6435263800797054, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.3639412530979476, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.654342605671994, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.19882981891203355, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.45714526865696425, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.32269274420690436, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.49704406859630557, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.42849655626964983, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.662646931303495, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3937441173550755, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5600824723479425, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.23114663823833642, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.5107406700140826, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.419793811546288, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6152785242440109, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.46086624699736534, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.6510894943437193, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.46507550803536196, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6687857543858925, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.2296660762967038, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.5259172094145851, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.39501632817024007, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.5505822266189535, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3215000448278979, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.5947774549102596, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.20870371467330825, - "sentence_nr": 23 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.40726160697608454, - "sentence_nr": 23 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3460579711860666, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.43910565102067395, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16692770661327389, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2940239540182693, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17589867762235817, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2991014535844428, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17340302865304977, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28581037214602456, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1782509297990519, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28710039249342334, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4901491669500622, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5638035394617603, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3460579711860666, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4260473803699743, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.2011131382865372, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.36314253622836745, - "sentence_nr": 24 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.17598839092477797, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.28650792027744043, - "sentence_nr": 24 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15997462319973554, - "sentence_nr": 24 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.24731742205813823, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3980108204104697, - "sentence_nr": 24 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5611872124508993, - "sentence_nr": 24 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7431443902355421, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4465866985385432, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6260699913485588, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4465866985385432, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6260699913485588, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25509991414681377, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.505614827211273, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21452424426866915, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.44780791445343104, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23857086413632697, - "sentence_nr": 24 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47971483823439903, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.446411600799131, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5816697577563045, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4664526119731094, - "sentence_nr": 24 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6399376431552989, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20156032858716424, - "sentence_nr": 24 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4855075115512445, - "sentence_nr": 24 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1526900266679129, - "sentence_nr": 24 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.41716995830580594, - "sentence_nr": 24 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.23259933287371404, - "sentence_nr": 24 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.20835831728362864, - "sentence_nr": 24 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.49812931259693377, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.17334119484500185, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.31463785312250736, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.12522096513057643, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.335302418196347, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.9100527513271326, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.9584484214161733, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.20156032858716424, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.35007862377558696, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3449632275226908, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5000457205552167, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.1529699053146309, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.35702516223197556, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.15975615838102766, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.16928451900289662, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.40173762794247314, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.15568794672327907, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.30284457998681635, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.16038844415635037, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.30359085570641314, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.3595283251171754, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5790446318474887, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.20563705341552085, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3762774944524412, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.16692770661327389, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.2940239540182693, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.14165832410287266, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.24107149684266257, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.1258646065963102, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.24857006332411635, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.2519649154562495, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.44974180175388206, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.3253958243003269, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.45173371737296786, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.27618177741751665, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4305107132988055, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.1683625745315614, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.31167225759119427, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.13728361101885644, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3436250633828196, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.16353712933127018, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.32934735468962634, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.48680589893384085, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6190257724123215, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.1551293035275564, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.2674082220133274, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.26091874007348304, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.17598839092477797, - "sentence_nr": 24 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.28650792027744043, - "sentence_nr": 24 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1332399603607437, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.19971937750838645, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1834283688193615, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.22588088032876846, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.12425342874478343, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1660533764831914, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.15538689193055893, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14158209035366248, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1869416235999822, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0578819658044546, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.16170596160446446, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2206817446345091, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14914968848461002, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.21702090583674813, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.138685682297543, - "sentence_nr": 25 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1258687317121735, - "sentence_nr": 25 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.1327332961698289, - "sentence_nr": 25 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23556366957615363, - "sentence_nr": 25 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22894370639738668, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.16684195647378827, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.21420692177337528, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.33150414660895594, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.30808679013173407, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23556366957615363, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.25521078373566897, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14257880024595157, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1979524022915653, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.14257880024595157, - "sentence_nr": 25 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.1979524022915653, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.260711748598298, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.28143225165615565, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.25621420675166556, - "sentence_nr": 25 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.32613185963061736, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.21310996044302127, - "sentence_nr": 25 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.2620829676028965, - "sentence_nr": 25 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.08892786873926031, - "sentence_nr": 25 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.14069122234920528, - "sentence_nr": 25 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.12273033502938982, - "sentence_nr": 25 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.15070376710164984, - "sentence_nr": 25 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.17376029392152273, - "sentence_nr": 25 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.22421987263715565, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.07369293827420972, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.08728042965046878, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.12416744870990627, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.06452498627127952, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.09758509152849626, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.09985298970743903, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.22158794642706012, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.20787168962643957, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.05401240601013853, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.07243671671799473, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.1543646468773244, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.09348998462584433, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.130990604448226, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.09885362316286796, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.15900429623613993, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.10903227170832805, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.11481934989482791, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.1745453831609756, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.046916282267844764, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.1250076305588977, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.12985392271660248, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.06737080019124615, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.18629057860741663, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.1504281768235603, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.11099491388125307, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.1201070010200949, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.08702826664587757, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.42262353460370816, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.3966051357904673, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.09612004569821603, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.10249207815381514, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.1341907303110576, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.11635402454082566, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.1636348970852316, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.06028131279303415, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 25 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.0901676620993871, - "sentence_nr": 25 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6983671476675032, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6697193437120026, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5809024483660724, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5409616569206442, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5893051076561628, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.555242666304663, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5197038614969076, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4944106522194635, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5863087308455573, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.5756247354842696, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.44763438063632005, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4327706284829231, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4562933372999328, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.4354000091116894, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.650945489442927, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6025447507087655, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5040260890269513, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.48159079549233025, - "sentence_nr": 26 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3966338449810425, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.3940867714969907, - "sentence_nr": 26 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3186669369694382, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.34867169182256896, - "sentence_nr": 26 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6350785093832516, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6188888500556722, - "sentence_nr": 26 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7997394936755756, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7811228513409922, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.9660854289024723, - "sentence_nr": 26 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.9613867167137871, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.7158159753911548, - "sentence_nr": 26 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7127947486849641, - "sentence_nr": 26 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6813410498464633, - "sentence_nr": 26 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6671821168913319, - "sentence_nr": 26 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 26 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.45066539224706753, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.4254592023616511, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.45779216736532874, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.40945502186629257, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.40071581088356767, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.36844216279073794, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.14609848125563302, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.18504017619904287, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.4184317523303411, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.40500270963162277, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.4125433652059801, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.3955923992862865, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.47182538941865537, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.42450279333172475, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.46492333059956836, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.4401112788616263, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.3967795858478363, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.3803134453035716, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.34915707707242977, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.34988691421168616, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2613611691981996, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.2740054517113319, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.5600863252474344, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.5179797138258272, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.3461243385522883, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.3560268535895035, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.43650008892828823, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.42551924250056755, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.504580863725975, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.46703102558879955, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.36954961729302616, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.34760122558190465, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.3803026331533805, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.362200056491149, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.24777987943516128, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.2952194113831596, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.5258092834799059, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.4981801549352249, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.48625052891235754, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.4290939038872796, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4045007320789693, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4098113348256027, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.44158642009003995, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.40903259597127894, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.4946406341236379, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.4719975064311173, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.41182432358851845, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4034715718148006, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.3693186725771347, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.36304188784855995, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3692675983091899, - "sentence_nr": 26 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.37402683054534963, - "sentence_nr": 26 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7645786047678913, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8655501219338723, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7858164289172753, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8872272977237059, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.8643729226327672, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.9215030582508996, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6898913050782208, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8620687741940413, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6898913050782208, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8528837782425732, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7708719635370461, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8793197587693242, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7708719635370461, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.888538633093067, - "sentence_nr": 27 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6840689169974626, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8314419144081646, - "sentence_nr": 27 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.5819799380263497, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.7407958979814505, - "sentence_nr": 27 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7645786047678913, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8655501219338723, - "sentence_nr": 27 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.41098733201100757, - "sentence_nr": 27 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.651283133493195, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6152755816095169, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7669297251133314, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4250002996145258, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6670552714553488, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3735617779670567, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5773479111816255, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5543498698280007, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7266847297604082, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3716332023564544, - "sentence_nr": 27 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.6132388888021502, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6986939462620247, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.8497711598086016, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5072570733389083, - "sentence_nr": 27 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7124868368374351, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5907596734005102, - "sentence_nr": 27 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7837270250239556, - "sentence_nr": 27 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.10008881112800158, - "sentence_nr": 27 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.29125356488795046, - "sentence_nr": 27 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.0, - "sentence_nr": 27 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.041649157343430596, - "sentence_nr": 27 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.6587480145435196, - "sentence_nr": 27 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.7917841426705801, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.7446828000198126, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.885521980076414, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.6466833757622275, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.7737914417145209, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.4447278656331358, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.6742569711624775, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.600047216971444, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.7511423755179258, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3382340617900419, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6182585373365673, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.6069548573053054, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.7630436854704967, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.40482952759410495, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6241130944295542, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.5021718181363274, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.697189669759932, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.7858164289172753, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.8717639062922423, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.5731680012014568, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.746935173521359, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.7224037170215811, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.8452672523905139, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.5724496367057007, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.7350859720106757, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.42250552136302394, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.6425389837629188, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.7645048342610411, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.876234192352485, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.45751787171307623, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.6647794363792763, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.7623067286250759, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.8682092620191191, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.46189821859121283, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.6442319235751083, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.3931991982536581, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.6422735790483707, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.44644290381704027, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.6892051604181435, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.4000177797533498, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.645169701736652, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.4479818542603719, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.6761961025641056, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.7123871749204508, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.8331784519293958, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.5749089871602278, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.7211428196508521, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.38506289173931413, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.6152360906748179, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.6231488481063673, - "sentence_nr": 27 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.7734960210241439, - "sentence_nr": 27 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.693261298341864, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.693261298341864, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.32329508170352383, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6141330847741713, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.3837983925863447, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.6366757448341102, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6667025833042813, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.746973053424487, - "sentence_nr": 28 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.6745016003476486, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8511670783317596, - "sentence_nr": 28 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.217554942150074, - "sentence_nr": 28 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4859163400220353, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5198655773563042, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5198655773563042, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.203264842568494, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4965705242699611, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.23693055763743093, - "sentence_nr": 28 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4968400811224627, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.35479105265934485, - "sentence_nr": 28 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.4725761870926308, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.3301899334885226, - "sentence_nr": 28 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5632801217523468, - "sentence_nr": 28 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 28 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.1923904871441659, - "sentence_nr": 28 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5825915593253297, - "sentence_nr": 28 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.32079058840140134, - "sentence_nr": 28 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5094305382960898, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.4892199210635081, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.6263002679299042, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.09147827112247602, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.3360691966057836, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2966218714191134, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.5348497180679597, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.3008656294855478, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5209701084013916, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.3254074668234594, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.540582703782851, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.22935466869603194, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6357138961264384, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.4460616097899727, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.6833569517560225, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.38769943713308697, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.6179897670313796, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.35964066074252593, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.5418421848087059, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.31666472263798334, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.5096984883597744, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2656621439255861, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.47187800221660153, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.41583634222861793, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.6558319092753532, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.26633048164380024, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5038200170930055, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.5371525807924681, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.7677378485184402, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.15274299622833287, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.4692950277268683, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.30626101600123445, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.583891679561264, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.18137691349228668, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4586072719105437, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.23443677523946913, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.5163278972706644, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.33876931708826047, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.550413577565279, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.14207405313947058, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.47874702297210975, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.2539342198718324, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.46375067718601715, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.200726550812963, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.41645295439394076, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.42995245074388394, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.6515566568079457, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.2834052290575623, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.4974109921343301, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.19454290935168927, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.49909763892228687, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.3837983925863447, - "sentence_nr": 28 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.6379993550810827, - "sentence_nr": 28 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.1481394578697113, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.30063818852404856, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14216645907653844, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.2737034564138708, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.14939354788683526, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.29041654772860626, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 1.0, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5420662441541858, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5445089463670787, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.40919282596076484, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5542936932152527, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5928902071159559, - "sentence_nr": 29 - }, - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.647817438132439, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.5928902071159559, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.647817438132439, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.34641959937802264, - "sentence_nr": 29 - }, - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.47549559716182727, - "sentence_nr": 29 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "bleu", - "score": 0.4125519163596689, - "sentence_nr": 29 - }, - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "translation", - "metric": "chrf", - "score": 0.5539867049403877, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "bleu", - "score": 0.42461633178803443, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "translation", - "metric": "chrf", - "score": 0.5603699277937889, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "bleu", - "score": 0.2340216139262901, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "translation", - "metric": "chrf", - "score": 0.4522093023662336, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "bleu", - "score": 0.4132352454218328, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "translation", - "metric": "chrf", - "score": 0.5544725906870476, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "bleu", - "score": 0.3951500216160541, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "translation", - "metric": "chrf", - "score": 0.6089660957340174, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "bleu", - "score": 0.42282359171428024, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "translation", - "metric": "chrf", - "score": 0.5395092365663595, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "bleu", - "score": 0.35412968165085734, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "translation", - "metric": "chrf", - "score": 0.4985795126785612, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "bleu", - "score": 0.1598921499894403, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "translation", - "metric": "chrf", - "score": 0.390187618292215, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "bleu", - "score": 0.2400540439585043, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "translation", - "metric": "chrf", - "score": 0.49297433772099697, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "bleu", - "score": 0.4806604068305994, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "translation", - "metric": "chrf", - "score": 0.664228268001068, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "bleu", - "score": 0.2340216139262901, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "translation", - "metric": "chrf", - "score": 0.45184273575809186, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "bleu", - "score": 0.7778111223054219, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "translation", - "metric": "chrf", - "score": 0.8190064480412373, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "bleu", - "score": 0.16533113836624475, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "translation", - "metric": "chrf", - "score": 0.4074791764578974, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "bleu", - "score": 0.28547397706062927, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "translation", - "metric": "chrf", - "score": 0.4838477808123968, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "bleu", - "score": 0.6053011982655683, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "translation", - "metric": "chrf", - "score": 0.652613765735072, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "bleu", - "score": 0.4229247984636106, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "translation", - "metric": "chrf", - "score": 0.556465536088555, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "bleu", - "score": 0.42254876310519374, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "translation", - "metric": "chrf", - "score": 0.5561399558171133, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "bleu", - "score": 0.3471790743028735, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "translation", - "metric": "chrf", - "score": 0.4458106286047354, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "bleu", - "score": 0.3555508425572384, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "translation", - "metric": "chrf", - "score": 0.5387745992013905, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "bleu", - "score": 0.1709686260975486, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "translation", - "metric": "chrf", - "score": 0.3940091304204109, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "bleu", - "score": 0.25958657290343434, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "translation", - "metric": "chrf", - "score": 0.43162699627918094, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "bleu", - "score": 0.2213908395073965, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "translation", - "metric": "chrf", - "score": 0.4213527844474163, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "bleu", - "score": 0.39696685122270786, - "sentence_nr": 29 - }, - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "translation", - "metric": "chrf", - "score": 0.5497060467823045, - "sentence_nr": 29 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 0 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 1 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 2 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 3 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 4 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 5 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 6 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 6 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 7 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 8 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 9 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 10 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 11 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 12 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 13 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 14 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 15 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 16 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 17 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 18 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 19 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 20 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 21 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 22 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 23 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 24 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 25 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 26 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 27 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 27 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 28 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "classification", - "metric": "accuracy", - "score": 1, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "classification", - "metric": "accuracy", - "score": 0, - "sentence_nr": 29 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9411583614202783, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9389202454786235, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8775848642818888, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8618703443763697, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7861888156926622, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7987489460131649, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9319748402595084, - "sentence_nr": 0 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7613425680699503, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9020031517329425, - "sentence_nr": 0 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 0 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.883570112979728, - "sentence_nr": 0 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8263460336753243, - "sentence_nr": 0 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8060322164809728, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8980680846396624, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9491059403137463, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9664300701360793, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457224261353452, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9708225134054753, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419324607589119, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9619002332717353, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9189927159116271, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.895905738615658, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8719916488298841, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9397108105925289, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.884345665982421, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 0 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9584454525436005, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9643081480127652, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9067144042813564, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8781616442886918, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9745733081082687, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237743711831492, - "sentence_nr": 1 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659571253320222, - "sentence_nr": 1 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9044755244774213, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9016506657203592, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9259203238585231, - "sentence_nr": 1 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9226314544302758, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6237003645369218, - "sentence_nr": 1 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.919365977563579, - "sentence_nr": 1 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9113270242697518, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.898943894327586, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736119227904283, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9415432301630186, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.973004167300919, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9617726716367615, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8788632576179716, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9442690941930104, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9167527970009353, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9264966822048945, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9760432643638268, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9290639912797567, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451284616565533, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9571970948049097, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9428452278208271, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.924510998540744, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354255661287414, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9038448099971822, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9290214610132344, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359307328554756, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9462257677914746, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9685511109758306, - "sentence_nr": 1 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466350739636148, - "sentence_nr": 1 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7785501063601203, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8677672451180615, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9519685270619841, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5524309559543085, - "sentence_nr": 2 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8979970994003059, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8979970994003059, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9598023304313453, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8678877090803476, - "sentence_nr": 2 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3628854370408249, - "sentence_nr": 2 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8866932684030095, - "sentence_nr": 2 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7932574787392968, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8840632918991035, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9244224424282228, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7493760739956499, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9434070582654602, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8626111481890223, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9742381587466754, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9614829239512629, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9634058264556766, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.846746937646691, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9416090102549223, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9586487245465463, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8628736669093499, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8883148663773122, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.921000444185013, - "sentence_nr": 2 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 2 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.964284245003951, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899852954654377, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5884852453065169, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8943359440390058, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6239646156236577, - "sentence_nr": 3 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8782485779028959, - "sentence_nr": 3 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9219735185328113, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8379214027434272, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9171135147465285, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8793006100154936, - "sentence_nr": 3 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6764135013792538, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8320911917964368, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8320911917964368, - "sentence_nr": 3 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9020259333664543, - "sentence_nr": 3 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8443316591536836, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9062739514559724, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9243814194896306, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9257122714800141, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9048929676970495, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9233238051356927, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8961117810241208, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9137011072166213, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9144918070375806, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9447475462972004, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9418568225974095, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8631885674989124, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540570534869818, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9356691952085903, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8263666332486633, - "sentence_nr": 3 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9187937618702817, - "sentence_nr": 3 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6492261286778312, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4782990117524071, - "sentence_nr": 4 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8728890059382535, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7924841060781368, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8728890059382535, - "sentence_nr": 4 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8085699807438939, - "sentence_nr": 4 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9309167160514913, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8335210974928002, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9407617520385465, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9009704508776215, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.886161550229872, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8864780713525466, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8619950335517561, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.877644990158928, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9473578431592224, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8989284887461744, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8982857165205713, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421743042333945, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 4 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909430339396572, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9114715597392106, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221676855227006, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.903310364652346, - "sentence_nr": 5 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.43631872104818037, - "sentence_nr": 5 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.725100223395414, - "sentence_nr": 5 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8342041754812477, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7541096773855238, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9352893606252747, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7137044016250488, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8459329201101423, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9155785169978052, - "sentence_nr": 5 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.454243405917021, - "sentence_nr": 5 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4367071875067552, - "sentence_nr": 5 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9053865214400596, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9344907300105301, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.941467473244312, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8759462570863868, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116059567890715, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95453015576562, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271804273091313, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9494380676747487, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8907525765155897, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9420326057327402, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8729192735278123, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.840210783941434, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8830406923187026, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8705872791986208, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9129896861855028, - "sentence_nr": 5 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 5 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9775140091004713, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.931908394385036, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.958499216692883, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9787648208394673, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8848447424869419, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9476480635849643, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8420296194650692, - "sentence_nr": 6 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9645398026978572, - "sentence_nr": 6 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.976975965491712, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9575751193892209, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922108923148009, - "sentence_nr": 6 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9367021384173281, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 6 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9136709169732016, - "sentence_nr": 6 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031487241080103, - "sentence_nr": 6 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922108923148009, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9717329164232313, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9810420842974353, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9296061535584738, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9548717794727779, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9723617284409432, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9433216405879152, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9253992588631311, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7833761650543694, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8958698547783525, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659983030155975, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9368374793769542, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9292848975349729, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9705333075369675, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9560908971572966, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9288860917142431, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9402643484548583, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9303023646781129, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9076656012518489, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272618174968876, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630829363546703, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9437691960187881, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9233897890679653, - "sentence_nr": 6 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9217593594034571, - "sentence_nr": 6 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9429459010031568, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9680340601535599, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9444947592571505, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9524237679532525, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8521740000505951, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9147273981117778, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9353915284262971, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9575256886848735, - "sentence_nr": 7 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544425909905248, - "sentence_nr": 7 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.887089742205764, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937272463225717, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 7 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7360571605491374, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9158962896380519, - "sentence_nr": 7 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9016185053131788, - "sentence_nr": 7 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9519313199322048, - "sentence_nr": 7 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9002497361613263, - "sentence_nr": 7 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9404564646985731, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9525612663771642, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9416090102549223, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946182450185975, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8827665860178672, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9242269657430007, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9209375409360453, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9453162319718537, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354735336178899, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9650606723493668, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.937172702008466, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9037456319061896, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9527540439558733, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9777992945719618, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9483614149601093, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630476322301069, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9090634311284931, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9592439701684463, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9352813563171796, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9578898822826803, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9349087092124988, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9553475775967099, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9426144990998162, - "sentence_nr": 7 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455357310467346, - "sentence_nr": 7 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359599516797827, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8529883661830301, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9313047211019367, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9311406569876187, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9392038901097501, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9504743930445531, - "sentence_nr": 8 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9950087915805451, - "sentence_nr": 8 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283998656503502, - "sentence_nr": 8 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9934034758807603, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9131528589305679, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9917679206284817, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9566767123929576, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359924521743563, - "sentence_nr": 8 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8893588081911743, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9583698738001583, - "sentence_nr": 8 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9934034758807603, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946392812169666, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.911875333930421, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9169315433407361, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9541325707307038, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9648123726963476, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8370298547932784, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9334875203861144, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413496332501932, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9956823103485622, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457390517164731, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9671298665063969, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9336521523423332, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9502062892893858, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9333019767772176, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9037394051488277, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283644587512466, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237582925385585, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995566191566017, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.896344147038989, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.09821094254330615, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9548273305811203, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9251737690567995, - "sentence_nr": 8 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275689564213165, - "sentence_nr": 8 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272442008199501, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9520060001290835, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058859200742604, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8789724147701462, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9137645544850267, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969027357279203, - "sentence_nr": 9 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9520060001290835, - "sentence_nr": 9 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275374047069039, - "sentence_nr": 9 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8772309014828462, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9140052999897977, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.868350408637765, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7563541659131354, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8578315979157695, - "sentence_nr": 9 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8441075622700097, - "sentence_nr": 9 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.23829288001976573, - "sentence_nr": 9 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9407267756704489, - "sentence_nr": 9 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831845583109951, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9530684796567226, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8984174935165463, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.946008414943598, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9285885624039975, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9645189965938258, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9601667560566091, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9113133701465544, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363094557613988, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9499594621802195, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8850558582872771, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413520522974334, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8953760832780698, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9516191368774216, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.873135905690596, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331628274049639, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9350921637704382, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9398175409358328, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9675093986501344, - "sentence_nr": 9 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 9 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9282207391671503, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.957452925924953, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937237551170429, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256331955884847, - "sentence_nr": 10 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.904390835311888, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995954000535624, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.928962868887516, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9339798045072082, - "sentence_nr": 10 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8008809042180175, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240001424211951, - "sentence_nr": 10 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3493344613894351, - "sentence_nr": 10 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.92829327413418, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359271530286619, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9641555435524619, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90719289051837, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8543701176038877, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9045960456690756, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9576659929734302, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9445842802137389, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917893569547509, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9031282594956593, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9325823323160847, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9171277146973622, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9631220314707449, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9125575210703364, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9671298665063969, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8583796678495444, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917870378110458, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9075511178990168, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8942877287874674, - "sentence_nr": 10 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 10 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8881782096383685, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8452994228892592, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.857664755026069, - "sentence_nr": 11 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7687402404428638, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8221659843346086, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8685375697135141, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7860944644568774, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7829829019188287, - "sentence_nr": 11 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.289269703803095, - "sentence_nr": 11 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7829829019188287, - "sentence_nr": 11 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9425182378610694, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.915813486906383, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.928671169616198, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9195852720074569, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9260563505342738, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8580715674095071, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8991782906832555, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9549429726485847, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8571447284090962, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.953599772014362, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9382091007325469, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9125682774652475, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084959093441131, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9389584881035126, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8968120926569282, - "sentence_nr": 11 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8757339860702672, - "sentence_nr": 11 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9742989957563788, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9854564066904739, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938338375356983, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9363458435045497, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275189832478317, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9680610688075657, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9458276502828801, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9555270393882619, - "sentence_nr": 12 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.774972667720128, - "sentence_nr": 12 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9238483556315539, - "sentence_nr": 12 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9292605756517186, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8710905917506855, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8877998658561537, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9408832971568818, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8758560882945217, - "sentence_nr": 12 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9047504210526172, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9149458726191051, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9497380252636716, - "sentence_nr": 12 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9247145535687903, - "sentence_nr": 12 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8829314518141973, - "sentence_nr": 12 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9497380252636716, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9369900232316837, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9584772514045287, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9656526051593539, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9262800142753679, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9178799098053634, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8988056403515298, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240902217687106, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9454713149117651, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9457650793019858, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9115531547253959, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9403725471773088, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9107758326980321, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9251111872988325, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9267004903727016, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9652440580136615, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.924254800539438, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9054967244578502, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.886673201587762, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9082204179924286, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665046359304257, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9492870842156111, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 12 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9495327576081029, - "sentence_nr": 12 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9605742681789634, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9410712595774171, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.971921146040729, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8360964435901039, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278436686065653, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540941235545723, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7765803419515074, - "sentence_nr": 13 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9368660209060221, - "sentence_nr": 13 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9179315685239186, - "sentence_nr": 13 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9198867501155861, - "sentence_nr": 13 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9357668560693397, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.880651835588671, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9322025130978147, - "sentence_nr": 13 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8666701669384438, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9301584319196643, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9201441893603447, - "sentence_nr": 13 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4518476286184633, - "sentence_nr": 13 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8817151383770689, - "sentence_nr": 13 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9719892276800867, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9232252378020026, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90340499273861, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9445601279006905, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9284637794790105, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9506720475284802, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9650672132857259, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935825271074837, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9417006532894496, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180957642017807, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9336273124319283, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9199623581249377, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9420383150390214, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9169222881606529, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9358954768171188, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9210475526688618, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.900422383617428, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9195975724156285, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9482591669689567, - "sentence_nr": 13 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.894400898846725, - "sentence_nr": 13 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9683895601588671, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.974733551222386, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935724475087967, - "sentence_nr": 14 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945278116491169, - "sentence_nr": 14 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.913976993531483, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9206503738833902, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8879551150411227, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9093507960484853, - "sentence_nr": 14 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.44325871778061554, - "sentence_nr": 14 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8629899790604912, - "sentence_nr": 14 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9659019608247615, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9612040783142544, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9355702448711621, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8575724679460186, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.919154316989783, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9107041155041439, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8860042875765471, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9163443895096822, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9513360683724416, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9506442510575418, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9210869399305139, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8602965545640948, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8912610518101419, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.857937519719319, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528771181894694, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9241995664234885, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432104991415542, - "sentence_nr": 14 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8927784164557715, - "sentence_nr": 14 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8681309346882299, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9045257596276787, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7552111299277484, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.82396628763246, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8809116426093319, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9069369532463243, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8255413975339149, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116712045344968, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8855094439275503, - "sentence_nr": 15 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8809116426093319, - "sentence_nr": 15 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5085021700346579, - "sentence_nr": 15 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8155954216287978, - "sentence_nr": 15 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8849766832597384, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9321985099431636, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9158869153954171, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8940299169999223, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9029209331114941, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9434784706316768, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9504499063681887, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8103402263404181, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9033542015144801, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8920851535963175, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9012698346023688, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8815241253287673, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.955434974676454, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9190034267575142, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9028341607528202, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7933760889502307, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9669111778196173, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9183552099282611, - "sentence_nr": 15 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9213964969470535, - "sentence_nr": 15 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9438561056375272, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9245427558640842, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9466217999433078, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8600910973378976, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5465479162881712, - "sentence_nr": 16 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.908088143295894, - "sentence_nr": 16 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8689979953554426, - "sentence_nr": 16 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8837997874830685, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9430526976186369, - "sentence_nr": 16 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7213258253735133, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8583796678495444, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5352913894873965, - "sentence_nr": 16 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7506613813658406, - "sentence_nr": 16 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9120029292560927, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.969258616291086, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9359933426460225, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8446197069920836, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665537794677691, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7519024768911576, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419599049218603, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9100379761498075, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9243062555931161, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9232535952320629, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9430158926147498, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8923268998495886, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9316958873367511, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9441083273271286, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899852954654377, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451690574618664, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9470556595464068, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8625414653847894, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8658510104009289, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938651167013012, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9362303281043904, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 16 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9288883358178652, - "sentence_nr": 16 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7378741057437793, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.898904151376881, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8446522700991944, - "sentence_nr": 17 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9053865214400596, - "sentence_nr": 17 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8700885813654318, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331139325257429, - "sentence_nr": 17 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8860497305091617, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8222704990602537, - "sentence_nr": 17 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8402559609277754, - "sentence_nr": 17 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7386088026745246, - "sentence_nr": 17 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.06557474419143802, - "sentence_nr": 17 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8700885813654318, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8771568927591851, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8869070241487921, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8173012945645394, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8220012279932035, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8449397341788647, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9239069749524619, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8972504357155736, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6602446784708298, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8667833154965509, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7306831212016971, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7306831212016971, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7406377967705062, - "sentence_nr": 17 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8509760908759664, - "sentence_nr": 17 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96926930549605, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8641726957145408, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9637804258017773, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240863542577373, - "sentence_nr": 18 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9450374119495017, - "sentence_nr": 18 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.96926930549605, - "sentence_nr": 18 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424882191492142, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9392663489644577, - "sentence_nr": 18 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8389799674466019, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9253208187778743, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 18 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221577416896909, - "sentence_nr": 18 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9144266092886102, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9500117624130617, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.905862662289465, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9003734503251455, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.858544407149412, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9281598514152588, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.948121913854874, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9629589146416885, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9580736862318411, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9708835294542548, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9234823141384267, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9939521304203686, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9474838221026617, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9488355997601815, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9424390135303181, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9711070259637357, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9237920416869381, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8942780008373756, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8468261925085733, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8976119317111001, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9527352893094178, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9510981354135275, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9521144628004171, - "sentence_nr": 18 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 18 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9774592733638915, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9806060444395596, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9049668032095894, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9665042848270522, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9667317239059525, - "sentence_nr": 19 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9159800198090925, - "sentence_nr": 19 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9667317239059525, - "sentence_nr": 19 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8925738398388144, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058585844143391, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8888787903169728, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8925738398388144, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9128855680689195, - "sentence_nr": 19 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272821491047395, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9413354408985303, - "sentence_nr": 19 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.46619006556188114, - "sentence_nr": 19 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.913896513382741, - "sentence_nr": 19 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9058585844143391, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9557922260754473, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9391656780027514, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9260113686541587, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9419307613884336, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9845996986850503, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9255228522887315, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.889174440461237, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9496761617043387, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9322360743819351, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.935492418630274, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9456325305487512, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9325466173278317, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9240800356922247, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9361690788124847, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.938043640398588, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.901373116210745, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9531605377803356, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9132591460407243, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9494481589794223, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9415361564397403, - "sentence_nr": 19 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.897450557161678, - "sentence_nr": 19 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.937002127196651, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9435408381256087, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9421449698305296, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9607456319189528, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5467617051776391, - "sentence_nr": 20 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969209805167669, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9060555921929084, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8969209805167669, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9096430262961498, - "sentence_nr": 20 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7983940190154283, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9132591460407243, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9204057102575467, - "sentence_nr": 20 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4968312722246179, - "sentence_nr": 20 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8001971757912975, - "sentence_nr": 20 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9204057102575467, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95112146871187, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.949624286506194, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9588139991437585, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9236414681715879, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9453633691396565, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278367059866518, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302237306555959, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8441460025255829, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9470556595464068, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.951863030034636, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8944443568631728, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9082204179924286, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.90717359411325, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9014597856352894, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9250084453288043, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95462554022758, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9318340131711181, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736147802901586, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9182449217144187, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9400180064454685, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9255769217104873, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9309426923102619, - "sentence_nr": 20 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9002012094811458, - "sentence_nr": 20 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9690017425712892, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6924365679057801, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.804543317337012, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8385395593542468, - "sentence_nr": 21 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9515560914045473, - "sentence_nr": 21 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.854435717190483, - "sentence_nr": 21 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6676892344393273, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.873135905690596, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6885773376269438, - "sentence_nr": 21 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.46961217063286037, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8379214027434272, - "sentence_nr": 21 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.20981645725460496, - "sentence_nr": 21 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6659995521111991, - "sentence_nr": 21 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7570244995532351, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8388678282825207, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9180596829241628, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9166274634412449, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8626786769008709, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7991709881281639, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8119656541607598, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8872308158649556, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8914910756561332, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.927494511055529, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9528614248210486, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8523282278495175, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9297633204435644, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9278042759794851, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8998995790099074, - "sentence_nr": 21 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302677881301988, - "sentence_nr": 21 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9522511234396616, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7585159184184324, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8922770448230282, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9126128133576369, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6245412677586388, - "sentence_nr": 22 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.868233862673363, - "sentence_nr": 22 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8852329532489643, - "sentence_nr": 22 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8378994642516495, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8775848642818888, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9448292727000915, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8555426729178464, - "sentence_nr": 22 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7558344174949267, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8497451239178159, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8493237569441244, - "sentence_nr": 22 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199763712080639, - "sentence_nr": 22 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8206722459046871, - "sentence_nr": 22 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.883570112979728, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.914786293186172, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8845568645036501, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8937192042814042, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.901348698020278, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8382013802825361, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9219786709510569, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8767649499531999, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9094880423990607, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8719390074611821, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9349020382990011, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9272997117562144, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8962185446474815, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8930034245249151, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271664513693498, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8936606750264663, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8803360259381345, - "sentence_nr": 22 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8680210960657176, - "sentence_nr": 22 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7931982206364059, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9438398456065387, - "sentence_nr": 23 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9281186022380125, - "sentence_nr": 23 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9699436870249787, - "sentence_nr": 23 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9184823166209557, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8884834862973964, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9300073119656489, - "sentence_nr": 23 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9381606131991436, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8839868610728687, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9300073119656489, - "sentence_nr": 23 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4072337657555589, - "sentence_nr": 23 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9184823166209557, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9293646790023864, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9511392272878579, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9293879632586071, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9277950353049101, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8843378183459343, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8741633139531418, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9271525909282003, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9736840552120738, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9396084767892234, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9187563342696414, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8896752045577786, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9737097349915758, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9330058893011377, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9570066548501687, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9201684039669155, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9133901345922595, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9458636432813123, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.917857433142856, - "sentence_nr": 23 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9375412439691305, - "sentence_nr": 23 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9556267474396976, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489054429933926, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489054429933926, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8327628422929998, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9249365863966041, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.922528755167094, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9486938895906879, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8620685016584069, - "sentence_nr": 24 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9405916043682414, - "sentence_nr": 24 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9327915990783561, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909738029095061, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.909738029095061, - "sentence_nr": 24 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8707492337114523, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.95883735444933, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455007606735264, - "sentence_nr": 24 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9372630850025364, - "sentence_nr": 24 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5983897920478856, - "sentence_nr": 24 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9299762198228243, - "sentence_nr": 24 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9302303599426779, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544609413449265, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9355306533611718, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432457481338326, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9472285181144658, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.923828763793418, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9224761498105726, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9756278595118478, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9499594621802195, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9544238060448419, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9012364553153411, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199585012210312, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9280048312907723, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9587462450914201, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8938919301593574, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9507758066685948, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432005035367906, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9675203656708941, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9303385434730891, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9401106918306472, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9533532275954528, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9274629860503822, - "sentence_nr": 24 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8926908826740254, - "sentence_nr": 24 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6224897798032885, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7893575827661004, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9478696521177714, - "sentence_nr": 25 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7160421907140165, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6217685026572488, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.794919886900137, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8793006100154936, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5916523997385489, - "sentence_nr": 25 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.4849269488253923, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7462718113811923, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8083701726292805, - "sentence_nr": 25 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.722502153449955, - "sentence_nr": 25 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.5544920599877754, - "sentence_nr": 25 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6853792233736985, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9200538056807258, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630774769374594, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9143443086107108, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9052744049140443, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9018850910676268, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9142574363760879, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9168431011517528, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9141901633008906, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9317477810881586, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9354759108346813, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9141453314674155, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9550191440621234, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8402328635525613, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.831845583109951, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9092382099397807, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9770044719642067, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9155318202784664, - "sentence_nr": 25 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 25 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8272309965382391, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7767725512278205, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9373981486656514, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9579023880929557, - "sentence_nr": 26 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9268329536813669, - "sentence_nr": 26 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.904428807825769, - "sentence_nr": 26 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9116613044583819, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084279839455062, - "sentence_nr": 26 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8013174743750245, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.34811585804131506, - "sentence_nr": 26 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8912610518101419, - "sentence_nr": 26 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9084279608664247, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9342971539350323, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618018909441389, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9221850850049388, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9621502301102783, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9207497282487874, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8817316559043479, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9595521389704431, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9604273088099046, - "sentence_nr": 26 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8832167531630292, - "sentence_nr": 26 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618018909441389, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9338423795983638, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8069582822584229, - "sentence_nr": 27 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9432051372011929, - "sentence_nr": 27 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8538919155402751, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8601111478550084, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8274840531521687, - "sentence_nr": 27 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8737243337458652, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8213297311895551, - "sentence_nr": 27 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.3007622907436899, - "sentence_nr": 27 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.906379768806771, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8996352283472103, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8577239523880982, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9705288278234159, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9022302698191352, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9618116705103616, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9282902444420971, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9283062281157928, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9143841728614055, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9148205155364358, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9429357495928096, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8199038085123204, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9007500710615358, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9335504867261654, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8519148326217993, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9467340802817513, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8914166352994622, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8697448206881571, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9091527400737927, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9487286082082608, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9550331732946552, - "sentence_nr": 27 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 27 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9703747509928279, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9540941235545723, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9543144589160125, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.652649628941592, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9767775472269087, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9393628940364738, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9410712595774171, - "sentence_nr": 28 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.6444379795256558, - "sentence_nr": 28 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8964898605551818, - "sentence_nr": 28 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9263597385884417, - "sentence_nr": 28 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.824741266541094, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9256238040654331, - "sentence_nr": 28 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8283905649271065, - "sentence_nr": 28 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.881413837458117, - "sentence_nr": 28 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.899546929868499, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9437940294094723, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9269703177791706, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.944904344834561, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8479413107328494, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9715595760527852, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8595969327963556, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9538713542813556, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8348508116391393, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9572462820044535, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9375119517314923, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9493167367596885, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9344916654109876, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9849529115133767, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9275259780895282, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9276874028790393, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9473074618830379, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9460494618521745, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8923268998495886, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9455007606735264, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9526558782357073, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9338345156544289, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8995764072227389, - "sentence_nr": 28 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9505226544098013, - "sentence_nr": 28 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9630841609539229, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9451142647196181, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7510122845400926, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8090165300577936, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9543128468386116, - "sentence_nr": 29 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "en", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "openai/gpt-4o-mini", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.920197561569537, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.1-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8759929746436435, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3-70b-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-small-24b-instruct-2501", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8935424392990651, - "sentence_nr": 29 - } - ], - [ - { - "model": "mistralai/mistral-nemo", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.7769676399488106, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8875472267363329, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemini-2.0-flash-lite-001", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "google/gemma-3-27b-it", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "qwen/qwq-32b", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8664932988313133, - "sentence_nr": 29 - } - ], - [ - { - "model": "microsoft/phi-4-multimodal-instruct", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.906379768806771, - "sentence_nr": 29 - } - ], - [ - { - "model": "amazon/nova-micro-v1", - "bcp_47": "zh", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8001297194719582, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "hi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9048724843551281, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "es", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8947987168857687, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ar", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9489238765618674, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ur", - "task": "language_modeling", - "metric": "chrf", - "score": 0.945278116491169, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "bn", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pt", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "pa", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8989194854163256, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ru", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "sw", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9061728639858796, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "id", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9501419212325259, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "de", - "task": "language_modeling", - "metric": "chrf", - "score": 0.891206254843651, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ja", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9331628274049639, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "te", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "mr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "jv", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9046319474149982, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "vi", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9463095328863311, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ta", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9085828484030862, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fa", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "tr", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "yue", - "task": "language_modeling", - "metric": "chrf", - "score": 0.8856061163721227, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "ko", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "it", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "fil", - "task": "language_modeling", - "metric": "chrf", - "score": 0.9661878700572512, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "arz", - "task": "language_modeling", - "metric": "chrf", - "score": 1.0, - "sentence_nr": 29 - } - ], - [ - { - "model": "meta-llama/llama-3.3-70b-instruct", - "bcp_47": "gu", - "task": "language_modeling", - "metric": "chrf", - "score": 0.908669313428767, - "sentence_nr": 29 - } - ] + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5679608237702286, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.746881923400435, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4438455475739657, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6320800718582147, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5894973558751632, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7562097956860054, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3846086976522069, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5835344719191324, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4804215535486392, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6694735319785804, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2511517944602615, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4484633445384819, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5820808184424484, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.73788733854976, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5749603738163459, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7240488251574404, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5617561349997696, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7132694856647042, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2963216580569375, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5101500486835966, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15317719477157257, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38800976493585004, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6001453932849357, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.762029391170019, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.30676942927198475, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4968492831219663, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32063971770635635, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5206258401513325, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39086127104761287, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6239956806265569, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3020679767949182, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5246291817407542, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.29261990846502584, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5207965578474395, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23343658187420896, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5188968707275573, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2920008662633279, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47119207959541226, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2596939072050362, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4394574387008692, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4273817965049865, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6016204186733703, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2777551012631926, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49423240120783246, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.7964573357809173, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8458636471716781, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.34633672321253084, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5378805625051344, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3582301850807646, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5380305837807603, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.300740577257699, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5272774705181614, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3099603853356145, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5209233176748354, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.35580399268816465, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5392592206305507, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.39317381456022266, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6026058740561834, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.48930936408255293, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.699085629239476, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3963410285961713, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.613166190285915, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.44294247711132617, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5915660675216782, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3756985486608933, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5991443770283833, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5009456904181451, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6893719644090858, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.18273944860385094, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.44261865187418153, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2153742037697241, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4581737688885401, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3372953649368346, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5482505380106469, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.28528905353056333, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4885812318466243, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2935204022158406, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4867597973247361, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.2929684584911775, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5038324436049059, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4034224234291925, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5736798834726872, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1077205146963877, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.428338145564396, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.22327767951697297, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4063556880747369, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2572733200413211, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.4520014138562526, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.40311197004738203, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5788525108956781, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.45313578977486535, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.6160993561903745, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2651736858432996, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4491383344282561, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.34545319957597864, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5727052860304503, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15815751066481462, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5152611872266766, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12903696060775005, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.456225988032654, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.024459391267874976, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12351824822447692, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.46822754470803873, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3465147345201782, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.08516700886866406, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4091252890943268, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.19194937906573872, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5477665664300843, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4370196290761142, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20669086265781264, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5076721272198604, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17630490037560695, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.48116430160978857, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4122750002638689, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15412719160788987, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5010353699512481, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12369892692249995, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44549610902403686, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12560672881768975, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4969560260291519, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17077058518804336, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5022008374701596, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10784756064735967, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4427230465401631, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.06735571462439276, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.38102852892512806, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.42723260976616784, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1694466724647263, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4902502031746037, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3532931581623198, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.175396614619324, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49736499605529066, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15154395847232716, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.46053919348995803, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4041678259311437, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1290514243115152, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4766581477336301, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.08273178236238297, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.36399666460809255, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12601482779921785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.43595665254608706, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.40959087443621306, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6348509381122925, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.08214106568089705, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3969463877642616, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0744904632040495, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4111163205685468, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12894104034845807, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4486368934849452, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.10070927557742705, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.43718220262892105, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0772718393063023, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4203683137304257, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0756907193511249, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4138725093679467, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21748353646757182, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4462746462826943, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4179644538349004, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10505106462290037, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4474870048911137, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.0009218289085545725, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.15653859793617866, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.43177798053127925, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0891537192318598, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3970634926176537, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0950136506275681, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4372017487229785, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1259356760989446, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.44568274520971096, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.16322494183480127, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4815584993817062, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0904087252785689, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.41830513174690515, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21351902664706998, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5130443042033361, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.16269986423611488, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.06939838145153245, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3371547585108182, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1691386174483793, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4920789340026317, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.14944432524273302, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4972796478830659, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.09793316925795417, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4297577431879659, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.38870674200492367, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6484380084879691, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4923751299732868, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6853756490381199, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3996712647649035, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6353525755760105, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5115346945020283, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7037574715738644, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.017834618169115152, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05927156798818119, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.23904922011090457, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3399292774084129, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6152980280400979, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8311281590297233, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.005449161724399305, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.026158029267484995, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24508104771894088, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5725552336126134, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20801258614305904, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.26703508536995574, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.35315040956049437, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.625895188503691, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11133996756497437, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4410280353998367, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17743299460161885, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43071271897416463, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16052654068024738, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41580120868053494, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.05963579607071745, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.31139762378406344, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.006734847287559362, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.03408121951468736, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.09880177230676102, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3297638349619511, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2377604053257556, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5662768009060447, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.22573408807826306, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5444672928195973, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10742716472890976, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.42694859148910824, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14745870033404418, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.475170637938921, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.21665407194210906, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4344921442639243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.37994652561206577, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6464467277069994, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.09362261118571368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3452056942265759, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.18917620656425485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4346170232980484, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.420450507904553, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.6503146347305717, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.24894072982768842, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5212235893093335, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.393613605227227, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6492198447661237, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.21147734744561483, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41020178654369294, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.2329856851831642, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5405751250637106, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.41756686236967944, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5616829345739638, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.38189567401226293, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6154314825900052, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2126707920684064, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4659908460634765, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.23240102389974368, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.4973274282641141, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.17979384730979156, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4177311931467539, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1702602472176709, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4366640707779677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.933651069586263, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.9586507529693243, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3816408219023713, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5784105768028126, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.18398226639192106, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.37285010531146734, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.26958884543190903, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5631664732610485, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4005296397635166, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6201785376974677, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.15956483578595942, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.425693420655628, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.2323385180696658, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5019509292309764, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22952177306405494, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5279520952576137, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.3618488169166299, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5708179622131996, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1712766252338756, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5225554962608486, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.2709079038456153, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.447458019441992, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.38249626297768063, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40976234193505356, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5806197937310393, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7346706700987636, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5793367580502561, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6502428441722727, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4855332614117322, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5299556742893647, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.19940445989088915, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43164821827950184, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2423441824135159, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4429509373913047, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7056438934239434, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6064630666233242, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6752055521830945, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5357110024227318, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6365941772753647, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14790264259417688, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.27159767590045303, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4751132438608344, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6849386986272349, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08635800047213174, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.218109371254876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3682311523733465, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11739521786077453, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22090491782919655, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.280413108453108, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11547518641061649, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25945846414490087, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20233074088759792, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3746629492952356, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40214612768560637, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45128424593135114, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37284875432797243, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44888401040760956, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0925329498915617, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2110486160692096, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.12453389344594705, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.141543757252386, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2594145364221844, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6244631487487835, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6931369519059803, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.25383339228798274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.45896379476820603, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17200767571780612, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3723150838362789, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1477219991186121, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.28685201698226354, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254455687469726, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4474512036484817, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.23887527917609022, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4120359948636439, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3556521383601747, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.594830811413066, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21629114799587432, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3542320138389837, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.580451128369423, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.728208634600343, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.27405612859390877, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.4639958592456083, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.13004800471424346, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.28217142159025543, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.37821486365532614, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4718665834023439, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.3699382260470039, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4032851361478274, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45167594566243024, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.5169677927619225, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23386786214190372, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3780009826926042, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3925121365052661, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.47788592802001717, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2596718628394258, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3572188192648703, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.45381175288762937, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.07425055521504613, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.18122341046764998, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1978585723043446, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3527599187160617, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2523019529343173, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4406369072888057, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41072675483179805, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5635589150380774, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3883375900135818, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4643731845106876, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6242817472465665, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7123666275414222, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2246029757863831, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5773502691896258, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7999099314029202, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6417603075499863, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7825422900366437, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8503171627677965, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.37709297891717664, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6881502501430368, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8003203203844999, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453478043428296, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5581982021478125, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.652013511062815, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.629039349740581, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5881561248602009, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40435987083533204, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.39858613265631837, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4425973012069069, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.47160616105623426, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5309982646782259, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1892240568795935, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6151179643430991, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.41238100267720657, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.39909989628767284, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.17181529671327242, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5293474685884572, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4429196299668147, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3830425592586042, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5802683403568892, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3237722713145643, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7426638026175545, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.49342175914364256, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4352628824108997, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5116862201536014, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.33471616336068044, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2865612242047131, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6433813179203622, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3598792258309727, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5125809225356253, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5539920925426138, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5226572946586268, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5073395824633415, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.29382595610734974, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1667955161379731, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5773664661124461, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7013062757071812, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9303769449292738, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2381658499765768, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9063898435384111, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6756014232714684, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4529852871970908, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6941474239078328, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8522456714074852, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9096914044088521, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.9457416090031758, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9892952933418456, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5087473540251254, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7647955332172516, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5738396574789242, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.798357133373606, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47410002229034043, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7538467008030766, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4234885228074744, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7410180114887145, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47375069012411286, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7543919667018285, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7829685247145245, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.40276720463657734, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.6529271690805427, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.30188353873287377, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6086565367747951, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.6026286934891149, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.8025775976044891, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4596980088392874, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.713787745993602, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3272712268138726, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6272846474183881, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2981792160679168, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5788026000794341, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.7012294787544179, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8478115719875968, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21690365808279138, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5384773678665918, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.25711386542134795, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.6088853751738869, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5695988432761473, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7516103467926585, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5072784644062104, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7361065921505279, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.38091370416670794, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6438225861756911, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5091224918749461, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7202697992734389, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5309354663044072, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6990707992725005, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1673872929477023, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4506667273103674, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6917901740466924, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.8479928839177578, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3416581331218724, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6578570934289981, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.4797543511401896, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7240781310560407, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.6401876410870359, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.7526484951226097, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33057129676705455, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5669225664686625, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.6004981752197522, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697646564917222, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8492326635760689, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9027320255916917, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30350690419450826, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.569133886912883, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6834516951654327, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3263040636562357, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3751840463233443, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6279894552667558, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19268479640608693, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.551397074868541, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17470942957770763, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5403400891349619, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.37392149096896676, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6665214662145853, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5838790966762375, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.19464521962073492, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5763410052067085, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5460240376042262, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.24343304284910333, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6275577931282961, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28571962561926445, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6431872581462166, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.6255340042200862, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.8724783049357475, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.48994561421713123, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7411155087367244, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4547900039222725, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6541971428810075, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.23198210427894825, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.630711601223299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.7122562458056777, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.32263864160302524, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6824395076981005, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38305978177479755, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6061131723054572, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.24047860794644352, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.58198979036704, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7511573912724299, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.9453473543978153, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.42984824697674956, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7289444696770301, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4881010344921759, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7317734491561229, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5971070986250356, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.8874294965619517, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3684981984538114, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.5606332518476288, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4536404448264584, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8020827133708689, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.4545091839935173, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7166050399790445, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3370129264673147, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7096874943799061, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1624355752882384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4952968469712617, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4831233610237384, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7807505267551733, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5595205105615875, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.8322210048001876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3142665434344143, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6466526067220029, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3610544299180199, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.49125115898082056, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4284945090100314, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7164026439677106, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.8578928092681435, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.9422733087334002, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.42818224355402373, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.42105372680687736, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7001171094008295, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1582866049832572, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34487142413575794, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15521606028436608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37645329404497957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12620429887108936, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35580703793872603, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12872220631084524, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33602633953270183, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.042121062429802174, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.14281404499176092, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.042575418285137674, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05173688961049459, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3045613775157565, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5275070803493389, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2734283774929853, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5252214120598302, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10203846572325131, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.33381153680096753, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.014935758919429663, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08106107745254391, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.044304867337633724, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20806974344498103, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08860973467526746, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178004360288637, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15268019045355535, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41028757620299977, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.030860166165309233, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1100250143829584, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21255327712152144, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.43272151570555034, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.01486609147288197, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13893773605583024, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.06609667473412645, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.26197209338359717, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.26064517697298795, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5092206110218525, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1507980395794452, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4306039128585424, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1438459189500836, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30693371625402605, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0979038733644086, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30211704738953993, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.009624974244068071, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.07318255686027669, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.043420474648595074, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2884095690753619, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.13868172938464635, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3094469764260441, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.11091252683001185, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.26607634610445896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.18154954789336694, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4557483776072868, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.1381751568911733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3121557499162649, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20065115069964384, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4084885616013531, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12291219097556666, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3448002180666873, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.20608572305725564, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.4704943905570542, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.085416483900781, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2825804066750608, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11452508920842025, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3212742401272785, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15478222669012726, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3550584759508654, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.07875433150726119, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2638954513805452, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.10734088848154077, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.33946796348247366, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.17795920517030017, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.41862955401967455, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.19388048412249795, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.44361702376789247, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1237012344369667, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.33331866832253354, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.15589802574348086, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.37894206802233305, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.1948502778967486, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.35525815981538433, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.1618333627385132, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.3458746996740858, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.17393111207515277, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.39042812195808824, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.19064689695123957, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.36954921822756504, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1785851272602057, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3800733399524004, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.20113943179758872, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5054929215592371, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.07088281524771703, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1725752257112697, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11901413329120636, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2908877283991857, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.15593857496482408, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3832822126692406, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.21107720643690867, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.43911506176829573, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13410301071131794, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3942932268034351, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30677064886592076, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5308555945242818, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1327526847508867, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.37850602486495205, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18405035438430847, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4142901090120915, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.061826017721563604, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.08852681798207009, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3583179111355935, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3857436691295343, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5750224388123065, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5920893212447781, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6925021521158101, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22478613858269392, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.44348101018104913, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.183687049781416, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.351911486970854, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5181825846579515, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17328174803055044, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3178268797869574, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.286608441075188, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4579283646292802, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25861130592298187, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.39452644092432093, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20379250618355427, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41085414309816914, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.310679343206099, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4471183729584148, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2334787866969297, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3621517589760531, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5866873582151947, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.46269559069048716, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.46872641361415845, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10434360980785336, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3012789660952507, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13835317113453516, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16343842313572918, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3986641525285075, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.30890092021323623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5553909583113487, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1690979933029136, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3751861276375209, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2840563956846642, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5110250591004448, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.19920413481788912, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.42537796926163113, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.20401796878756984, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.43317630453631556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2044887070217883, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.38471585132587544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2980504190448601, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5101268920225042, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.9436043261706615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.9880191679951993, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.18831933500600306, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4318025704181776, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21544027588567594, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5040038440508637, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.26970223719007375, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.5172978597562362, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30630098078522544, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5439056051092116, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.19850842371858787, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.43584341835040474, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20170335119323748, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.3541251997977811, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3182774828667731, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2309552734743087, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.43975656978777905, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.23530033724858213, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.46208607300298377, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.37284027455688556, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5528347504734102, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.2887308472548599, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.38846174119508314, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.15487293534817623, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39293494862736383, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.21741853044139284, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3535910166292039, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33626819961829335, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5466581859383387, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.32000331642122953, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5480591855923784, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.21132630077912357, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4175670766052166, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.13108369255325433, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3929302741911199, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20174045447955946, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.33729298835089516, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20972571494011877, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.395894071208527, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.35369375385786006, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13087682931309413, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19462952976787054, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.013538497707846785, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1570208067577934, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4113045280468524, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15082713742973322, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3965911699770542, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15471428129658016, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4580211317461481, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18928475425929295, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4916060435820526, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.21940429389247643, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4343280866601455, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1824401863423467, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36709433185688595, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3377854698776805, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.521201229892482, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12475846123062707, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27823340731817514, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10759927692349745, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21065794536310511, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.07843772989359644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1324578891826276, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08163977068875294, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.09047502044256338, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21669141850731985, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10322985794794913, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.24491122482530842, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11809057094812304, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.27930342777387007, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21268444697113978, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3229997133764549, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1475503033983142, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22104108935973044, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16434349396840395, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28582614857210975, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10085167559661873, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23831215045289575, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17543744527808774, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28201016956553354, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.12274092982883021, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3385513651938691, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.31017716089889963, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.21688283061839067, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.41775824162589076, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3228288840559658, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.18237599479708327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.3740403511567824, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.12876689524369925, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3253153379449275, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.119159749312327, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21297942664093145, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2036348471340078, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.3472831655579266, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24362353508932386, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.28135849152758385, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.14482189302397735, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2913876815877049, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.16306957103469613, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.9199349282509897, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.28112283847231073, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1308613527030366, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3063146286877558, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.12787395553510186, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.21931515993565381, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1441966459257424, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.14957316612525498, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27675048474641756, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.3780460244391623, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.12503614625842938, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20624064341134082, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3368893372278425, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2961559727627133, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.12846497020051437, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2670865602673704, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28252374116432993, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3549531183419122, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26128489301072644, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2126837065505244, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.07149097424598219, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7774075575820374, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8943538262827356, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18639667871924825, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4540232715517938, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8431643718744966, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9341410275694613, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47095916883357913, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.701526330557871, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.38260294162784475, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6692418584049541, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4093629115744712, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6243156092220487, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.36703839483583006, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6725357332891145, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4322450379367835, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.697398762810304, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41122010762096617, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6697492221087861, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41126318495820946, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7254294465493162, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4682601513034942, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.691130012325589, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.35334199245807973, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6858610070406853, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.33061666631099795, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5343307680770133, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.38981415389445495, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.665622189515994, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3223937524276847, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6719135382778884, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.4466645979681496, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.714247354760266, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.6233091888805312, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7757111039890131, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.33414322499224436, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7159580680193959, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.6620694102966999, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7893416551805176, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.26540383860058264, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.51610805930355, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.49335830881778164, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7240615166053675, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.4024279293206815, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6798070651801875, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.6153147385756811, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.8160952378322835, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.6838493012537611, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8178509424142287, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5169198985488462, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7879691803533485, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5223010192696725, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7442134884509299, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3885151883045163, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6763151870864087, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5985488590218004, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.8248561222494313, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.37163791993879014, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.6792432753943116, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.5152630372775983, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.7696821316655393, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.43521980294891405, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.7204319998551938, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.46417187236805535, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6653227698984816, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.519124054532681, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.7733428788002137, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.5083170211670072, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.755952798269267, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3161432307247198, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.5990810117425377, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.40980949787910764, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.7145653936496129, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5770135999436572, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.7697316849447288, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.7030214416074754, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.8357829168322639, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.49199339399396913, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.713934780293142, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.5002824356846001, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7029341279811726, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.029124970213905314, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1779610499753793, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05989397907532586, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.13539167567510446, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.03073685498855941, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08933758530290428, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21051269871304829, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18854722085547196, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1387123733773652, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.05499461839884487, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19978068293555115, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1388011701223677, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1460389336009171, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.038796252164058714, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1756002877791377, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0029868578255675027, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.015380253532528225, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.19065171436703615, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21083781655774478, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.14590438247348272, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.04379419293412465, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.15119622228734425, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.21315318926996712, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.16991425356152365, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.22371589981083434, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.20982178138488494, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.20189358781069322, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.20261685251676126, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.226729844497646, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18184342512086546, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.2185121523322681, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.17386106914161167, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.08272059515141832, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.1814025725787457, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.23945930551153607, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.20815933215961574, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.09886053260067004, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.14345644530149382, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.19097844728039898, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.08246021416977749, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.18868639139421345, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20665565461558383, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.17764901410543646, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.19312651305380893, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.21371557282714232, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.18854043679878274, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.19559831357902827, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1914895496057553, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6666935927206881, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7957561291403441, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.34999116613463505, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6356075517191035, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.48649824146709, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6763447333054696, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.367622917844187, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5615050712672139, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4081538556642202, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.46386216052527535, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4300174433641992, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5099800158255156, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7963205130973803, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8101688749569373, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6570128212612868, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6262090565616182, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5866943184579982, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6390393619950272, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.01047222192173988, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5683565265173782, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7072367582469653, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20287366424876002, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5368464080033196, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5198707241967666, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6993305416237223, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.36603776814499195, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45532918164901276, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.13525036115537795, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3120848453730729, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3474347870952493, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7073395735740273, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6577952971578602, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6570128212612868, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6221526807313811, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5460462259563637, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6641829079106271, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.04884431803904408, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.18357384275951122, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.28073304156067924, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.360657984953223, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.46365764298816153, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5757521453586436, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.3147715014841853, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5986154863155839, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.3885646234110734, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5051669760132699, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.464413403675355, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6291656356697347, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.30490938758882236, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.579088460457721, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3758073513458154, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.5302950018189692, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.29308025637967977, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5715200997140051, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.43285599641891276, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5551678521355665, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.25984882476296983, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6305744214119023, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.48649824146709, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7255446918266525, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.464413403675355, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6853183317800515, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.34999116613463505, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6356075517191035, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.4426623526629488, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.612058732370435, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5522004843736675, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6166558670381421, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.37954187220913477, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.5550325994532472, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.3147715014841853, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.521228891025682, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3964513253420688, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.6095420129111676, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.36033217429111203, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5550014071110869, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5753930328058733, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44882520213790794, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5856175239899348, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42760828727369016, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6065010489098535, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.33403925633579773, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5915394296427854, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3212785834179169, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6158121620368939, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1751489536280261, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.378593296276962, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3214110553053944, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49232390716994445, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.479033905070678, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5975149526416976, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21177549089429396, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1424915360855107, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23985076149753726, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13309638637723345, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18696197122203645, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12256515595630638, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23303109995893123, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1419886619859991, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.24113733359485448, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1324448705928064, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.22863839042697148, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12017886776600228, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20794486026487116, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1164257728844972, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19249901344360867, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12325384013681445, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1960232617116645, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12806473847444227, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20054688779645718, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1345714227066951, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21078968525268058, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1802615495980454, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19630112442374525, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8212614342207556, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5888582552569348, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5888582552569348, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7876222308170935, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.43550490048931545, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6419345531187637, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17539593635425982, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3139104155809725, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39225487001250453, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5189967318357492, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12859070457371286, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22162336097079333, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6871546336787117, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6258765997974801, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6680248455809015, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6258765997974801, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6722124517361844, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.17023327167529265, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2521455524828544, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2229548791980166, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.15247670030930355, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.1324448705928064, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.23382021475411732, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.09766807787022613, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.16788063248730647, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.12111615182138995, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.21505717177216926, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.09979796185764318, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.1310501345458609, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11512937599552589, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1852451960926282, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.126642985054506, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.20913543330915318, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.12632059501697884, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.22490978846607526, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.1352612651586241, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.22176710342008016, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.18982400330057914, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.11760179026027952, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.19531596229980544, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.09968269909242322, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.14510210137368384, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.1204925245474865, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.12192273449574796, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.18177358407861108, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.16841504132177978, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.10667790151233097, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.17427579502643556, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1508875367739971, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.20889434105456664, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13184959768302618, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.30505662513933907, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.09878901581794378, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.1651800705978423, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.13150403915662862, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.20736628090200235, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11824658049755846, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2047497542808756, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1461072488843534, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.1946917085815184, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1018151014848322, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.14524830913329922, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2589080403198245, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2189767496390278, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.09761931247072746, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1397102655312677, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1326689502117876, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.167569694983793, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15848968577272604, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24447662789322752, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20665940380705064, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18243716955007858, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.16168125580314086, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2450013599045987, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.20901732384345645, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.20222677481313764, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.18492694642397273, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.18243716955007863, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.16667457585564618, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.08556679632324991, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1575852366903021, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1474874322154398, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9202663016973823, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9263876898254182, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8621431910551439, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8363304387269249, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6656058483395763, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6306557167105028, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8657947138469048, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8367521498141209, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6993348038140574, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6335836519040372, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9419492177147062, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9202237383102091, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.11064738383914807, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.12449466772796605, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12222372495044852, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.12383047729216191, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.1392580908972882, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.1333265070823728, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.07717159074475938, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.09413026539458375, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.16807498532991816, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.16404257857373192, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.21005284223037346, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.1679703861465872, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0951509584925814, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.12014553061064691, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.11737915185320068, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.10085050674562507, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.11377195287577829, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1301681094143453, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.09455636771034115, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.11463120929696417, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.1544787887603271, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.1384236976807813, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.11488572123868507, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.1455973492295447, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.13735441291745387, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.20255423961944058, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.205408273869532, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.11470196605012067, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.0960438892364715, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.07184436307032757, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.20378989148152887, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.16337212771611656, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.09669863605676213, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.10886215421099144, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18171364159867548, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.16245793974098002, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.07562263205281951, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.09819928715831736, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.1430606569063152, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.128073928655324, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.09526781380423786, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.18223449608285797, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.17127401148639734, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.09855718610544388, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.09669863605676213, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.11679541132562438, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.09643517424337235, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1226126790254367, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3969253441303859, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43277080710930865, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.26887073704667247, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2918476164856665, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5183146371291372, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5942793492554739, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.028864519535915668, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13535086012687783, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29687399422087424, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.002376388269368755, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.04574695485583133, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2703094106380642, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2982249908859, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.29313061087267483, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30295384730328956, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.284911205299835, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32067889250923776, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.29353055611145706, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3381266475327612, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.09910529437987022, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2511990291834263, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.295394335805579, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.362515947701148, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7243776840931383, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8980107630353439, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9325718821645923, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9490053815176721, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6653044831075519, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7986980418662383, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8504591592783618, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8980107630353439, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5950322600507224, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7090542316843602, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.44768974737795825, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.45520472994232203, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6509298345623671, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7962234681835563, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41813929088914065, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4779008399806691, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7243776840931383, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8642805496461259, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9506885335787997, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9606382935593174, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8665175293126633, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8642805496461259, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.342569723746894, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.47156710056973744, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.2319934375578505, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3367678538644817, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2261681529206079, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2647144854968396, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.35554722872430145, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.38873710544604445, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3720000272862786, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.44695658930348453, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4118588818865406, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.48573453292579605, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2998354233286452, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.41144215385645566, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42142495511264777, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.49708063531780444, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.33296735510279596, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4176386300927819, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.32522259162581857, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3572499606049779, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3449668516380805, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4341194278942322, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.36161896085795575, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5052818563161547, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.2798191316489921, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.31866179281073254, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3170440263520106, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.45327673850268096, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15538140800156827, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.22365453282977818, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1352815632479558, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2610624350708668, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.35907597395908514, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.433310273977633, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.33498522957587384, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.4529680464694055, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.30675389390381064, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.49190118767827684, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.13922661372145656, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.27553494979330584, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3515170550015674, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.37881852198491145, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.30950829536527374, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3839157172568008, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.22141947821999777, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3633108862011865, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2957849631521743, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2872269269040579, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19474118932727338, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3257294949902081, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.26505727008662233, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.41342120940573923, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5489548889989204, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5292552311493306, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.43141660874998483, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4251732952639193, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.479859141564773, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.47978767796651084, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2751349202729036, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.311148395820729, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5435154526669127, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5395341377171525, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5777979902630328, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6331337405946555, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6121338866063298, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6222767269627676, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5440627210252523, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5801365308278273, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5097049681318312, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5622473457673939, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.42567378467735034, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.470165978205223, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.47594607773277786, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5363851621507516, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4533373633026252, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5042718376547173, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9682566771439106, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9779127328168863, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7040822331405046, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7673268835807536, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7639225615341296, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8135226479972402, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6729400620282456, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6736973998414632, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7640211005075139, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8179683170395244, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.8509306641805077, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9162670716850285, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.43141660874998483, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.45005622460103567, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.5269212212163125, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5528502361092263, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.6736973998414632, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.7157738382386983, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.33491174038847354, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3646077683106875, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.450293182440332, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.4822292034174927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.19834633509680927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2712763621688402, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.546749262754264, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5830342194369027, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2754139367364165, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.34665831783057166, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42877544777223947, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.43803970127356867, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.43908893511874636, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4785460996828672, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5898466143484524, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6611594562951559, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.44701416909786756, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5245065297475329, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.31417347869916407, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3530975487930333, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.6373258340947424, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6437421244363288, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4715455630189013, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.543275675805182, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.2807304798995431, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.3418543172008782, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.5397682182130759, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5703951757357331, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.5446420954986508, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5662782206307382, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3378721588486122, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4362453299175689, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.49288474585647657, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5578180330951528, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.36197274748300795, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.36134314178088084, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.17060055774694924, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.2566677182784047, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.5717883675148524, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.640780099960748, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.221071468018936, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.41620491059292214, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4263215396273059, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3711481893609263, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.4101392170618868, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8813081534414112, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6486802664285581, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8066891982024211, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7344798528986015, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8855631322316195, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6486802664285581, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8585894188661937, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8813081534414112, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8434569599214109, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9123500588239437, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7849324644314795, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8934780380564308, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8799941663695641, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6809354000776107, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8640242853252401, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7838756540325346, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8841725044915145, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39503194300684213, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6916289318228928, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3094285625931604, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6328843883953666, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.30888995556875376, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6801864286113619, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5512199399393973, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.45862256824436665, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7660160731572102, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.47770079267358434, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8053780976175922, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6259358824502687, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8067950339997761, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5296344689827603, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7183083787484315, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7568440125092788, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8347576899702969, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3001800600660342, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6794930944968381, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.18879642915927602, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6584653291380502, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4697979053121435, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7554660353280213, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3164389365959547, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.7121929522648841, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.6031798395521694, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7819677495994619, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.5646631238098637, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.836206348617966, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.36615107686578496, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.696074520676609, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.1543252261021413, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4932064977882042, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6966863379186454, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7941296295595748, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.5487584440377526, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8692797308530646, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.8787142254774354, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.944457825946867, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5463887965663883, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7033378749149323, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.4912217876159168, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.7991339910300419, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7251215108320924, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8334871013677937, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.587725019570444, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7957550794048827, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.28856268147560865, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6187787024786685, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.4402122771181734, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.7716344099519011, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.18465966669442654, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.503938463452404, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.17973438065210462, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.5509051817440759, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.4809103179432793, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.7499547288317748, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.6244070585346295, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.8433626077474702, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.43660156107563336, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7165816705519701, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3748533897614559, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6863935447402433, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3607442374649342, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6876955247522804, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3718491333506089, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6941552634040441, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5110976370499285, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.842915559657988, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.5591535564944223, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.8079980831297509, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11809858631445573, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5943886568930294, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1423170365140828, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38605131339325, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3230989128220882, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13860487750886114, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36659667376085786, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36295227908523897, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.13860487750886114, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36118801210741663, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40877861250593944, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16673024281943524, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3975048254243706, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.11262865194228103, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36030161445252334, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3066941236048102, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.409404483413751, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3629681915617596, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12162779391619735, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4136500403395244, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1909693288724605, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4115524982336727, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14192760409508295, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3989311390496819, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20304460086424203, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4966336271433132, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3935462418730863, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.33523829330170474, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3250861966671464, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3051626462022859, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30944349609311117, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.11556522074454477, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.372688132616477, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.22392361812003433, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.460938469666163, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10704943109718215, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.362953271903766, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.14392660099814805, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.376362134090542, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.11718316363212337, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.3844506520287143, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.4024646900219184, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.08197539732074254, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.35287478964221025, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3502198678697797, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.376636825008991, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.30372034137078635, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.21481172921264619, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4009028477501074, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15065778147399764, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.4580508275161034, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.21281360709834968, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.4292702902558381, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.13780534982274106, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3273034480518148, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.36078900962911326, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2491467453273127, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.47986445165634506, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.10905122148101043, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4502571446121065, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.274959074733397, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.3607206140473947, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.17796237395371306, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.48209511527864385, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.13644487773607678, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.36491236604183974, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.21850577875478958, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4494281444270959, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.31361999490423276, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.1222354265296326, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3727252294250617, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1109484758001971, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3612426584883393, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.20356858406857398, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.46358366365120834, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11530762783711283, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.3781690117672006, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.11907182322580316, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.49599003474365394, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4220964985804286, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4455062898838481, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32026140564476524, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4016870075045671, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.34697616124581016, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.40373943351486685, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4184617303786878, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4321132548050678, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3499900041521066, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3822330369569219, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4220964985804286, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4455062898838481, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.41428013900466737, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.425713879206717, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4184617303786878, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4321132548050678, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5102296603076779, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5412065437629714, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32282559495424096, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.38266426308756574, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4230074457298372, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4432451111759523, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6363676859401174, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6744544901797789, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9271746317040298, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9736668125871423, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6986939462620247, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7821077250864037, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9184678024441792, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8884834862973964, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.3797391466432489, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.3481158447116987, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.31102805827817165, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3375837027261476, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.19710660977672484, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2646181750020499, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3797391466432489, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.3274816319655301, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.28493958837889694, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.35876163607595707, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2485364833746714, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.2873862688213756, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.41664461891968263, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.42600414573009276, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2710684964643971, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.2982841390442802, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23005567239800093, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.29184715566281483, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.2741455993358603, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.36403543443534025, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.34279101776553306, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.42600414573009276, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.30955822779938535, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.39546682876478195, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.39475108115635776, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.42154888635191134, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2781617026804374, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.32302333182207527, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.08473168573832755, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.25650903369815853, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.2883871807684295, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.21660761852515356, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.25414220830184964, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.32910644083871465, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.29306886812256966, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.18084108219203518, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.27583433958197495, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.25612947694888455, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.3002607987321696, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3216291288446239, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4272249853925079, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.188590266789637, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.26177705380820604, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.3308736026652116, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3875427536757155, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.28432597056103653, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.35944124408933287, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.23631465024334478, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.2692006325646732, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.259615032947222, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.2855780701161316, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.31343233007308363, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28662182336952924, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.289946670354745, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2585958231966256, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1574562620502688, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2833933092608246, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2330649391612961, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2858508520944113, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17248469309075373, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3673041887389201, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28838937143148047, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.25480888745972646, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14839290005301392, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29565285341782266, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22266775943086, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10508106635796587, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2504422832248121, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22563365567811913, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12913533075470382, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.24776496881674256, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.08680476715745516, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22066482174709295, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12117880855911824, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.32137825349405363, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.20104685618767446, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25137213099939626, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.28372673673489807, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.164799256779143, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.32187376249458133, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.2969522070783606, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14440617372843148, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.27200704330334224, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.2442053369522631, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.33050427873462274, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2133219421911448, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.3424665224706109, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.8944054777319608, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.24197054442617688, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.21682999057776514, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.3722897460532404, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.24424323100599224, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.2205591704292585, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.3479467223515336, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1926917267834754, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.4545444680350158, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.17580772500133016, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.32957763052496886, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2148084015365523, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.40974307981059804, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.29622141199363383, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.24146688269469918, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.09958408398703665, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.22890983822248492, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.20795712301883962, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.282761705091657, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.2551114536415265, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.18112053860965763, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.3266298821510716, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.1423412184218882, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26467729752192487, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.286072901441292, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2851456053265138, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.09858834583812252, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7445389400758123, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9134769668037408, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2506297252541463, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8320381765431424, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9129044064886581, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.23443139907396643, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.29972668857564216, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12409597120849801, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2928237514438983, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.15083364266523736, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.38662429787924074, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22849324967229787, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44152236347960977, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2989569143807341, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4042166909648807, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.3423939053207622, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.17611268473423294, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.20441543914149457, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.18928624746011372, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.43639616127375797, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.29213008358451265, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5828788445270403, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.14679869139754204, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4021419566569229, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.329340597116918, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6347143291802012, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.2868708266227936, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5779499593492363, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3436610762802303, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.2782087319667435, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.632418768195088, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.3083012995502152, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6392851743718383, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.23050898626566632, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.48172150010681464, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.1969221590285716, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5644899370701738, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.32594818888335836, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49646222671189383, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4604008032403599, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.7444026788985108, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.29161716271402766, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.47302621872495865, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6854823532900025, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.3546725638586892, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.21468316165048362, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6851126041819388, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.250737833894674, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.40017617077306594, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.27204995504877727, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.2743963944428051, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.6341922683775969, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.7252122374710612, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.12586347848916266, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3554854950683664, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.3889045463729729, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.20229280648000492, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6194717199605934, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.31114459650134146, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.11856660123276004, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.34601719602607445, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.40072710492884706, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7206046648616748, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22174147515312165, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2117279815687756, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33999170096577974, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29221353951377876, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3058731661111107, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2747352174231836, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.42736771185803385, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.39727964545172, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.10975022749274138, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.13904829787402162, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2873518361947954, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.24505805183333226, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.33495074569972355, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3454509072842772, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.43090467385890824, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3813511699401743, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.22765977642995502, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2247283208344801, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.30931906627981315, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2527893205238235, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9210500207490827, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9069369532463243, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4607778969984477, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8103868370118212, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4885014761119101, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.827819363745503, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17903870455040152, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.18440575845606422, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.1981763713215807, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.2520139548059959, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.17499310607879404, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.18175908515502465, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.10089587713517954, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.11552870044063634, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3168035112884022, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.30580678632835573, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.10825039887617824, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.1278708456868984, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.29705138694670025, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.2780223931578523, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.18986262747887736, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.18230825914917978, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.42442305789888696, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.42734795538422576, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.18781316135387768, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.16808430602651067, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.3454156644973841, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.30446460704247824, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4479597674250984, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.41132840401983517, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.10704445941620296, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.13527356658034445, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.09941527806251362, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.13609735884978696, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19230259308735756, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.22211286692050705, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.20383889880388334, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.17813562619757226, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2986551380628858, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.30308773908860176, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.10536111661637193, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.13679626017050403, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.19732230687816163, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.22765162763479738, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.3987203877706927, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.39992851145514274, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.09467800236923245, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.11434380596647938, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.15034676904545285, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.36138016740101575, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.31224382417562974, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.345966570287759, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2816115803298224, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3461146475963348, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.30131374176129855, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.1552102601937674, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.1381803727119777, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4967067363118649, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6330776418175281, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5629116515332234, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.44774758283371513, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6323151453499094, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3353166764160673, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5279751808070301, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3340392563357978, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5542299582982266, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2288355034549531, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.469883747317403, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5472915485853102, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7136367183558585, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6159995640523437, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8398584608765305, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5155625728615272, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6435263800797054, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.32206162101132135, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24125880497129865, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.47825499190432214, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3292010361291119, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5670300297444607, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5511532346688224, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7550305399541021, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.34537865578685034, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5956718372193373, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.39080227521872696, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.621048393466749, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.2755396296659942, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5033588333252278, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5795086255869999, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7183582779188291, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6214211316495574, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7844755306149331, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6008383045972477, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7291842011448325, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25418196696822093, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5117784549266909, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.274941620352113, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.4651004879148919, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.22743363869750483, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5634710936922129, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.518836150464752, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.6242496691584447, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.5989032124636781, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.7291306908177887, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.2677353447271197, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.4460422364967209, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.3558785149067877, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.570837784052645, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.2624310277292268, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4915471393606767, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.28489318277723963, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.6000278331909762, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.5728668995816387, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.7460634178179616, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6687857543858925, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5155625728615272, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.6435263800797054, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.3639412530979476, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.654342605671994, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.19882981891203355, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.45714526865696425, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.32269274420690436, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.49704406859630557, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.42849655626964983, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.662646931303495, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3937441173550755, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5600824723479425, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.23114663823833642, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.5107406700140826, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.419793811546288, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6152785242440109, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.46086624699736534, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.6510894943437193, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.46507550803536196, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6687857543858925, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.2296660762967038, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.5259172094145851, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.39501632817024007, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.5505822266189535, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3215000448278979, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.5947774549102596, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.20870371467330825, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.40726160697608454, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3460579711860666, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.43910565102067395, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16692770661327389, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2940239540182693, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17589867762235817, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2991014535844428, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17340302865304977, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28581037214602456, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1782509297990519, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28710039249342334, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4901491669500622, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5638035394617603, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3460579711860666, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4260473803699743, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.2011131382865372, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.36314253622836745, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.17598839092477797, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.28650792027744043, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15997462319973554, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.24731742205813823, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3980108204104697, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5611872124508993, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7431443902355421, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4465866985385432, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6260699913485588, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4465866985385432, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6260699913485588, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25509991414681377, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.505614827211273, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21452424426866915, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.44780791445343104, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23857086413632697, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47971483823439903, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.446411600799131, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5816697577563045, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4664526119731094, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6399376431552989, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20156032858716424, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4855075115512445, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1526900266679129, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.41716995830580594, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.23259933287371404, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.20835831728362864, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.49812931259693377, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.17334119484500185, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.31463785312250736, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.12522096513057643, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.335302418196347, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.9100527513271326, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.9584484214161733, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.20156032858716424, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.35007862377558696, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3449632275226908, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5000457205552167, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.1529699053146309, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.35702516223197556, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.15975615838102766, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.16928451900289662, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.40173762794247314, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.15568794672327907, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.30284457998681635, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.16038844415635037, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.30359085570641314, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.3595283251171754, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5790446318474887, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.20563705341552085, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3762774944524412, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.16692770661327389, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.2940239540182693, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.14165832410287266, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.24107149684266257, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.1258646065963102, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.24857006332411635, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.2519649154562495, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.44974180175388206, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.3253958243003269, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.45173371737296786, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.27618177741751665, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4305107132988055, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.1683625745315614, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.31167225759119427, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.13728361101885644, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3436250633828196, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.16353712933127018, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.32934735468962634, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.48680589893384085, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6190257724123215, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.1551293035275564, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.2674082220133274, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.26091874007348304, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.17598839092477797, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.28650792027744043, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1332399603607437, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.19971937750838645, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1834283688193615, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.22588088032876846, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.12425342874478343, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1660533764831914, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.15538689193055893, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14158209035366248, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1869416235999822, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0578819658044546, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.16170596160446446, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2206817446345091, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14914968848461002, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.21702090583674813, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.138685682297543, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1258687317121735, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.1327332961698289, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23556366957615363, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22894370639738668, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.16684195647378827, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.21420692177337528, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.33150414660895594, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.30808679013173407, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23556366957615363, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.25521078373566897, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14257880024595157, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1979524022915653, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.14257880024595157, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.1979524022915653, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.260711748598298, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.28143225165615565, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.25621420675166556, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.32613185963061736, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.21310996044302127, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.2620829676028965, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.08892786873926031, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.14069122234920528, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.12273033502938982, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.15070376710164984, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.17376029392152273, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.22421987263715565, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.07369293827420972, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.08728042965046878, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.12416744870990627, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.06452498627127952, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.09758509152849626, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.09985298970743903, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.22158794642706012, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.20787168962643957, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.05401240601013853, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.07243671671799473, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.1543646468773244, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.09348998462584433, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.130990604448226, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.09885362316286796, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.15900429623613993, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.10903227170832805, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.11481934989482791, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.1745453831609756, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.046916282267844764, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.1250076305588977, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.12985392271660248, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.06737080019124615, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18629057860741663, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.1504281768235603, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.11099491388125307, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.1201070010200949, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.08702826664587757, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.42262353460370816, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.3966051357904673, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.09612004569821603, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.10249207815381514, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.1341907303110576, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.11635402454082566, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.1636348970852316, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.06028131279303415, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.0901676620993871, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6983671476675032, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6697193437120026, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5809024483660724, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5409616569206442, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5893051076561628, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.555242666304663, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5197038614969076, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4944106522194635, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5863087308455573, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.5756247354842696, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.44763438063632005, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4327706284829231, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4562933372999328, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.4354000091116894, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.650945489442927, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6025447507087655, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5040260890269513, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.48159079549233025, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3966338449810425, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.3940867714969907, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3186669369694382, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.34867169182256896, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6350785093832516, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6188888500556722, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7997394936755756, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7811228513409922, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.9660854289024723, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.9613867167137871, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.7158159753911548, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7127947486849641, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6813410498464633, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6671821168913319, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.45066539224706753, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.4254592023616511, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.45779216736532874, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.40945502186629257, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.40071581088356767, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.36844216279073794, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.14609848125563302, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.18504017619904287, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.4184317523303411, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.40500270963162277, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.4125433652059801, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.3955923992862865, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.47182538941865537, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.42450279333172475, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.46492333059956836, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.4401112788616263, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.3967795858478363, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.3803134453035716, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.34915707707242977, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.34988691421168616, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2613611691981996, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.2740054517113319, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.5600863252474344, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.5179797138258272, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.3461243385522883, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.3560268535895035, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.43650008892828823, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.42551924250056755, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.504580863725975, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.46703102558879955, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.36954961729302616, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.34760122558190465, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.3803026331533805, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.362200056491149, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.24777987943516128, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.2952194113831596, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.5258092834799059, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.4981801549352249, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.48625052891235754, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.4290939038872796, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4045007320789693, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4098113348256027, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.44158642009003995, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.40903259597127894, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.4946406341236379, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.4719975064311173, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.41182432358851845, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4034715718148006, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.3693186725771347, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.36304188784855995, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3692675983091899, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.37402683054534963, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7645786047678913, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8655501219338723, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7858164289172753, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8872272977237059, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.8643729226327672, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.9215030582508996, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6898913050782208, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8620687741940413, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6898913050782208, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8528837782425732, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7708719635370461, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8793197587693242, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7708719635370461, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.888538633093067, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6840689169974626, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8314419144081646, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.5819799380263497, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.7407958979814505, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7645786047678913, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8655501219338723, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.41098733201100757, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.651283133493195, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6152755816095169, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7669297251133314, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4250002996145258, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6670552714553488, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3735617779670567, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5773479111816255, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5543498698280007, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7266847297604082, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3716332023564544, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.6132388888021502, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6986939462620247, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.8497711598086016, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5072570733389083, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7124868368374351, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5907596734005102, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7837270250239556, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.10008881112800158, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.29125356488795046, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.041649157343430596, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.6587480145435196, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.7917841426705801, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.7446828000198126, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.885521980076414, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.6466833757622275, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.7737914417145209, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.4447278656331358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.6742569711624775, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.600047216971444, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.7511423755179258, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3382340617900419, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6182585373365673, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.6069548573053054, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.7630436854704967, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.40482952759410495, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6241130944295542, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.5021718181363274, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.697189669759932, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.7858164289172753, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.8717639062922423, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.5731680012014568, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.746935173521359, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.7224037170215811, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.8452672523905139, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.5724496367057007, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.7350859720106757, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.42250552136302394, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.6425389837629188, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.7645048342610411, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.876234192352485, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.45751787171307623, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.6647794363792763, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.7623067286250759, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.8682092620191191, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.46189821859121283, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.6442319235751083, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.3931991982536581, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.6422735790483707, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.44644290381704027, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.6892051604181435, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.4000177797533498, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.645169701736652, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.4479818542603719, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.6761961025641056, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.7123871749204508, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.8331784519293958, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.5749089871602278, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.7211428196508521, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.38506289173931413, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.6152360906748179, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.6231488481063673, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.7734960210241439, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.693261298341864, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.693261298341864, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.32329508170352383, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6141330847741713, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.3837983925863447, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.6366757448341102, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6667025833042813, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.746973053424487, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.6745016003476486, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8511670783317596, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.217554942150074, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4859163400220353, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5198655773563042, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5198655773563042, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.203264842568494, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4965705242699611, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.23693055763743093, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4968400811224627, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.35479105265934485, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.4725761870926308, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.3301899334885226, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5632801217523468, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.1923904871441659, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5825915593253297, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.32079058840140134, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5094305382960898, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.4892199210635081, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.6263002679299042, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.09147827112247602, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.3360691966057836, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2966218714191134, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.5348497180679597, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.3008656294855478, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5209701084013916, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.3254074668234594, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.540582703782851, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.22935466869603194, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6357138961264384, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.4460616097899727, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.6833569517560225, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.38769943713308697, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.6179897670313796, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.35964066074252593, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.5418421848087059, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.31666472263798334, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.5096984883597744, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2656621439255861, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.47187800221660153, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.41583634222861793, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.6558319092753532, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.26633048164380024, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5038200170930055, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.5371525807924681, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.7677378485184402, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.15274299622833287, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.4692950277268683, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.30626101600123445, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.583891679561264, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.18137691349228668, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4586072719105437, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.23443677523946913, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.5163278972706644, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.33876931708826047, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.550413577565279, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.14207405313947058, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.47874702297210975, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.2539342198718324, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.46375067718601715, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.200726550812963, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.41645295439394076, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.42995245074388394, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.6515566568079457, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.2834052290575623, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.4974109921343301, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.19454290935168927, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.49909763892228687, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.3837983925863447, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.6379993550810827, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.1481394578697113, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.30063818852404856, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14216645907653844, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.2737034564138708, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.14939354788683526, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.29041654772860626, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5420662441541858, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5445089463670787, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.40919282596076484, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5542936932152527, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5928902071159559, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.647817438132439, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.5928902071159559, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.647817438132439, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.34641959937802264, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.47549559716182727, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "bleu", + "score": 0.4125519163596689, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "translation", + "metric": "chrf", + "score": 0.5539867049403877, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "bleu", + "score": 0.42461633178803443, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "translation", + "metric": "chrf", + "score": 0.5603699277937889, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "bleu", + "score": 0.2340216139262901, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "translation", + "metric": "chrf", + "score": 0.4522093023662336, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "bleu", + "score": 0.4132352454218328, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "translation", + "metric": "chrf", + "score": 0.5544725906870476, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "bleu", + "score": 0.3951500216160541, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "translation", + "metric": "chrf", + "score": 0.6089660957340174, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "bleu", + "score": 0.42282359171428024, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "translation", + "metric": "chrf", + "score": 0.5395092365663595, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "bleu", + "score": 0.35412968165085734, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "translation", + "metric": "chrf", + "score": 0.4985795126785612, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "bleu", + "score": 0.1598921499894403, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "translation", + "metric": "chrf", + "score": 0.390187618292215, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "bleu", + "score": 0.2400540439585043, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "translation", + "metric": "chrf", + "score": 0.49297433772099697, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "bleu", + "score": 0.4806604068305994, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "translation", + "metric": "chrf", + "score": 0.664228268001068, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "bleu", + "score": 0.2340216139262901, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "translation", + "metric": "chrf", + "score": 0.45184273575809186, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "bleu", + "score": 0.7778111223054219, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "translation", + "metric": "chrf", + "score": 0.8190064480412373, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "bleu", + "score": 0.16533113836624475, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "translation", + "metric": "chrf", + "score": 0.4074791764578974, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "bleu", + "score": 0.28547397706062927, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "translation", + "metric": "chrf", + "score": 0.4838477808123968, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "bleu", + "score": 0.6053011982655683, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "translation", + "metric": "chrf", + "score": 0.652613765735072, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "bleu", + "score": 0.4229247984636106, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "translation", + "metric": "chrf", + "score": 0.556465536088555, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "bleu", + "score": 0.42254876310519374, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "translation", + "metric": "chrf", + "score": 0.5561399558171133, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "bleu", + "score": 0.3471790743028735, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "translation", + "metric": "chrf", + "score": 0.4458106286047354, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "bleu", + "score": 0.3555508425572384, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "translation", + "metric": "chrf", + "score": 0.5387745992013905, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "bleu", + "score": 0.1709686260975486, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "translation", + "metric": "chrf", + "score": 0.3940091304204109, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "bleu", + "score": 0.25958657290343434, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "translation", + "metric": "chrf", + "score": 0.43162699627918094, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "bleu", + "score": 0.2213908395073965, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "translation", + "metric": "chrf", + "score": 0.4213527844474163, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "bleu", + "score": 0.39696685122270786, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "translation", + "metric": "chrf", + "score": 0.5497060467823045, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "classification", + "metric": "accuracy", + "score": 1, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "classification", + "metric": "accuracy", + "score": 0, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9411583614202783, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9389202454786235, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8775848642818888, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8618703443763697, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7861888156926622, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7987489460131649, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9319748402595084, + "sentence_nr": 0 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7613425680699503, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 0 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9020031517329425, + "sentence_nr": 0 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 0 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.883570112979728, + "sentence_nr": 0 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8263460336753243, + "sentence_nr": 0 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8060322164809728, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8980680846396624, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9491059403137463, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9664300701360793, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457224261353452, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9708225134054753, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419324607589119, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9619002332717353, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9189927159116271, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.895905738615658, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8719916488298841, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9397108105925289, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.884345665982421, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 0 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9584454525436005, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9643081480127652, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9067144042813564, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8781616442886918, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9745733081082687, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237743711831492, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659571253320222, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9044755244774213, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9016506657203592, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9259203238585231, + "sentence_nr": 1 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9226314544302758, + "sentence_nr": 1 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6237003645369218, + "sentence_nr": 1 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.919365977563579, + "sentence_nr": 1 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9113270242697518, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.898943894327586, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736119227904283, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9415432301630186, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.973004167300919, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9617726716367615, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8788632576179716, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9442690941930104, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9167527970009353, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9264966822048945, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9760432643638268, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9290639912797567, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451284616565533, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9571970948049097, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9428452278208271, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.924510998540744, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354255661287414, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9038448099971822, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9290214610132344, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359307328554756, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9462257677914746, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9685511109758306, + "sentence_nr": 1 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9466350739636148, + "sentence_nr": 1 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7785501063601203, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8677672451180615, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9519685270619841, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5524309559543085, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8979970994003059, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8979970994003059, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9598023304313453, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8678877090803476, + "sentence_nr": 2 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3628854370408249, + "sentence_nr": 2 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8866932684030095, + "sentence_nr": 2 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7932574787392968, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8840632918991035, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9244224424282228, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7493760739956499, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9434070582654602, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8626111481890223, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9742381587466754, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9614829239512629, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9634058264556766, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.846746937646691, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9416090102549223, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9586487245465463, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8628736669093499, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8883148663773122, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.921000444185013, + "sentence_nr": 2 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 2 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.964284245003951, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899852954654377, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5884852453065169, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8943359440390058, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6239646156236577, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8782485779028959, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9219735185328113, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8379214027434272, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9171135147465285, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8793006100154936, + "sentence_nr": 3 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6764135013792538, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8320911917964368, + "sentence_nr": 3 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8320911917964368, + "sentence_nr": 3 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 3 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9020259333664543, + "sentence_nr": 3 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8443316591536836, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9062739514559724, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9243814194896306, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9257122714800141, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9048929676970495, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9233238051356927, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8961117810241208, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9137011072166213, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9144918070375806, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9447475462972004, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9418568225974095, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8631885674989124, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540570534869818, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9356691952085903, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8263666332486633, + "sentence_nr": 3 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9187937618702817, + "sentence_nr": 3 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6492261286778312, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4782990117524071, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8728890059382535, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7924841060781368, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8728890059382535, + "sentence_nr": 4 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8085699807438939, + "sentence_nr": 4 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9309167160514913, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8335210974928002, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9407617520385465, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9009704508776215, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.886161550229872, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8864780713525466, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8619950335517561, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.877644990158928, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9473578431592224, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8989284887461744, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8982857165205713, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9421743042333945, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 4 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909430339396572, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9114715597392106, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221676855227006, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.903310364652346, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.43631872104818037, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.725100223395414, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8342041754812477, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7541096773855238, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9352893606252747, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7137044016250488, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8459329201101423, + "sentence_nr": 5 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9155785169978052, + "sentence_nr": 5 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.454243405917021, + "sentence_nr": 5 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4367071875067552, + "sentence_nr": 5 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9053865214400596, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9344907300105301, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.941467473244312, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8759462570863868, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116059567890715, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95453015576562, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271804273091313, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9494380676747487, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8907525765155897, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9420326057327402, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8729192735278123, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.840210783941434, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8830406923187026, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8705872791986208, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9129896861855028, + "sentence_nr": 5 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 5 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9775140091004713, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.931908394385036, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.958499216692883, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9787648208394673, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8848447424869419, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9476480635849643, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8420296194650692, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9645398026978572, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.976975965491712, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9575751193892209, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922108923148009, + "sentence_nr": 6 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9367021384173281, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 6 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9136709169732016, + "sentence_nr": 6 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9031487241080103, + "sentence_nr": 6 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922108923148009, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9717329164232313, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9810420842974353, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9296061535584738, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9548717794727779, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9723617284409432, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9433216405879152, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9253992588631311, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7833761650543694, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8958698547783525, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659983030155975, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9368374793769542, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9292848975349729, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9705333075369675, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9560908971572966, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9288860917142431, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9402643484548583, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9303023646781129, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9076656012518489, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272618174968876, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630829363546703, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9437691960187881, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9233897890679653, + "sentence_nr": 6 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9217593594034571, + "sentence_nr": 6 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9429459010031568, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9680340601535599, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9444947592571505, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9524237679532525, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8521740000505951, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9147273981117778, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9353915284262971, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9575256886848735, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544425909905248, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.887089742205764, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937272463225717, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 7 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7360571605491374, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 7 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9158962896380519, + "sentence_nr": 7 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9016185053131788, + "sentence_nr": 7 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9519313199322048, + "sentence_nr": 7 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9002497361613263, + "sentence_nr": 7 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9404564646985731, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9525612663771642, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9416090102549223, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946182450185975, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8827665860178672, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9242269657430007, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9209375409360453, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9453162319718537, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354735336178899, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9650606723493668, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.937172702008466, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9037456319061896, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9527540439558733, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9777992945719618, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9483614149601093, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630476322301069, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9090634311284931, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9592439701684463, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9352813563171796, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9578898822826803, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9349087092124988, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9553475775967099, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9426144990998162, + "sentence_nr": 7 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455357310467346, + "sentence_nr": 7 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359599516797827, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8529883661830301, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9313047211019367, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9311406569876187, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9392038901097501, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9504743930445531, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9950087915805451, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283998656503502, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9934034758807603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9131528589305679, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9917679206284817, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9566767123929576, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359924521743563, + "sentence_nr": 8 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8893588081911743, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9583698738001583, + "sentence_nr": 8 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 8 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9934034758807603, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946392812169666, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.911875333930421, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9169315433407361, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9541325707307038, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9648123726963476, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8370298547932784, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9334875203861144, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413496332501932, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9956823103485622, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457390517164731, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9671298665063969, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9336521523423332, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9502062892893858, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9333019767772176, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9037394051488277, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283644587512466, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237582925385585, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995566191566017, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.896344147038989, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.09821094254330615, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9548273305811203, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9251737690567995, + "sentence_nr": 8 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275689564213165, + "sentence_nr": 8 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272442008199501, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9520060001290835, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058859200742604, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8789724147701462, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9137645544850267, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969027357279203, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9520060001290835, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275374047069039, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8772309014828462, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9140052999897977, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.868350408637765, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7563541659131354, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8578315979157695, + "sentence_nr": 9 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8441075622700097, + "sentence_nr": 9 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.23829288001976573, + "sentence_nr": 9 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9407267756704489, + "sentence_nr": 9 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.831845583109951, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9530684796567226, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8984174935165463, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.946008414943598, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9285885624039975, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9645189965938258, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9601667560566091, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9113133701465544, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9363094557613988, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9499594621802195, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8850558582872771, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413520522974334, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8953760832780698, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9516191368774216, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.873135905690596, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331628274049639, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9350921637704382, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9398175409358328, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9675093986501344, + "sentence_nr": 9 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 9 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9282207391671503, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.957452925924953, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937237551170429, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256331955884847, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.904390835311888, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995954000535624, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.928962868887516, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9339798045072082, + "sentence_nr": 10 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8008809042180175, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240001424211951, + "sentence_nr": 10 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3493344613894351, + "sentence_nr": 10 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.92829327413418, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359271530286619, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9641555435524619, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90719289051837, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8543701176038877, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9045960456690756, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9576659929734302, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9445842802137389, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917893569547509, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9031282594956593, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9325823323160847, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9171277146973622, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9631220314707449, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9125575210703364, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9671298665063969, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8583796678495444, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917870378110458, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9075511178990168, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8942877287874674, + "sentence_nr": 10 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 10 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8881782096383685, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8452994228892592, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.857664755026069, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7687402404428638, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8221659843346086, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8685375697135141, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7860944644568774, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7829829019188287, + "sentence_nr": 11 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.289269703803095, + "sentence_nr": 11 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7829829019188287, + "sentence_nr": 11 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9425182378610694, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.915813486906383, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.928671169616198, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9195852720074569, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9260563505342738, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8580715674095071, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8991782906832555, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9549429726485847, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8571447284090962, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.953599772014362, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9382091007325469, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9125682774652475, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084959093441131, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9389584881035126, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8968120926569282, + "sentence_nr": 11 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8757339860702672, + "sentence_nr": 11 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9742989957563788, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9854564066904739, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938338375356983, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9363458435045497, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275189832478317, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9680610688075657, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9458276502828801, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9555270393882619, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.774972667720128, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9238483556315539, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9292605756517186, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8710905917506855, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8877998658561537, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9408832971568818, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8758560882945217, + "sentence_nr": 12 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9047504210526172, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9149458726191051, + "sentence_nr": 12 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9497380252636716, + "sentence_nr": 12 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9247145535687903, + "sentence_nr": 12 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 12 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8829314518141973, + "sentence_nr": 12 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9497380252636716, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9369900232316837, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9584772514045287, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9656526051593539, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9262800142753679, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9178799098053634, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8988056403515298, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240902217687106, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9454713149117651, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9457650793019858, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9115531547253959, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9403725471773088, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9107758326980321, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9251111872988325, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9267004903727016, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9652440580136615, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.924254800539438, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9054967244578502, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.886673201587762, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9082204179924286, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665046359304257, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9492870842156111, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 12 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9495327576081029, + "sentence_nr": 12 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9605742681789634, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9410712595774171, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.971921146040729, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8360964435901039, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278436686065653, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540941235545723, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7765803419515074, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9368660209060221, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9179315685239186, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9198867501155861, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9357668560693397, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.880651835588671, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9322025130978147, + "sentence_nr": 13 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8666701669384438, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9301584319196643, + "sentence_nr": 13 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9201441893603447, + "sentence_nr": 13 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4518476286184633, + "sentence_nr": 13 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8817151383770689, + "sentence_nr": 13 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9719892276800867, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9232252378020026, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90340499273861, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9445601279006905, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9284637794790105, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9506720475284802, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9650672132857259, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935825271074837, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9417006532894496, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9180957642017807, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9336273124319283, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9199623581249377, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9420383150390214, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9169222881606529, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9358954768171188, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9210475526688618, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.900422383617428, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9195975724156285, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9482591669689567, + "sentence_nr": 13 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.894400898846725, + "sentence_nr": 13 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9683895601588671, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.974733551222386, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935724475087967, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.945278116491169, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.913976993531483, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9206503738833902, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8879551150411227, + "sentence_nr": 14 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9093507960484853, + "sentence_nr": 14 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.44325871778061554, + "sentence_nr": 14 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8629899790604912, + "sentence_nr": 14 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9659019608247615, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9612040783142544, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9355702448711621, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8575724679460186, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.919154316989783, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9107041155041439, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8860042875765471, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9163443895096822, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9513360683724416, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9506442510575418, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9210869399305139, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8602965545640948, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8912610518101419, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.857937519719319, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9528771181894694, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9241995664234885, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432104991415542, + "sentence_nr": 14 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8927784164557715, + "sentence_nr": 14 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8681309346882299, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9045257596276787, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7552111299277484, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.82396628763246, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8809116426093319, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9069369532463243, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8255413975339149, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116712045344968, + "sentence_nr": 15 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8855094439275503, + "sentence_nr": 15 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8809116426093319, + "sentence_nr": 15 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5085021700346579, + "sentence_nr": 15 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8155954216287978, + "sentence_nr": 15 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8849766832597384, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9321985099431636, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9158869153954171, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8940299169999223, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9029209331114941, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9434784706316768, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9504499063681887, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8103402263404181, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9033542015144801, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8920851535963175, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9012698346023688, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8815241253287673, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.955434974676454, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9190034267575142, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9028341607528202, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7933760889502307, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9669111778196173, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9183552099282611, + "sentence_nr": 15 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9213964969470535, + "sentence_nr": 15 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9438561056375272, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9245427558640842, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9466217999433078, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8600910973378976, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5465479162881712, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.908088143295894, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8689979953554426, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8837997874830685, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9430526976186369, + "sentence_nr": 16 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7213258253735133, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8583796678495444, + "sentence_nr": 16 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5352913894873965, + "sentence_nr": 16 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7506613813658406, + "sentence_nr": 16 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9120029292560927, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.969258616291086, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9359933426460225, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8446197069920836, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665537794677691, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7519024768911576, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419599049218603, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9100379761498075, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9243062555931161, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9232535952320629, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9430158926147498, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8923268998495886, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9316958873367511, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9441083273271286, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899852954654377, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451690574618664, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9470556595464068, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8625414653847894, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8658510104009289, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938651167013012, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9362303281043904, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 16 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9288883358178652, + "sentence_nr": 16 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7378741057437793, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.898904151376881, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8446522700991944, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9053865214400596, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8700885813654318, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331139325257429, + "sentence_nr": 17 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8860497305091617, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8222704990602537, + "sentence_nr": 17 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8402559609277754, + "sentence_nr": 17 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7386088026745246, + "sentence_nr": 17 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.06557474419143802, + "sentence_nr": 17 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8700885813654318, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8771568927591851, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8869070241487921, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8173012945645394, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8220012279932035, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8449397341788647, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9239069749524619, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8972504357155736, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6602446784708298, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8667833154965509, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7306831212016971, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7306831212016971, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7406377967705062, + "sentence_nr": 17 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8509760908759664, + "sentence_nr": 17 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.96926930549605, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8641726957145408, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9637804258017773, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240863542577373, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9450374119495017, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.96926930549605, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424882191492142, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9392663489644577, + "sentence_nr": 18 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8389799674466019, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9253208187778743, + "sentence_nr": 18 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 18 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221577416896909, + "sentence_nr": 18 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 18 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9144266092886102, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9500117624130617, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.905862662289465, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9003734503251455, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.858544407149412, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9281598514152588, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.948121913854874, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9629589146416885, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9580736862318411, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9708835294542548, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9234823141384267, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9939521304203686, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9474838221026617, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9488355997601815, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9424390135303181, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9711070259637357, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9237920416869381, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8942780008373756, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8468261925085733, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8976119317111001, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9527352893094178, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9510981354135275, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9521144628004171, + "sentence_nr": 18 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 18 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9774592733638915, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9806060444395596, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9049668032095894, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9665042848270522, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9667317239059525, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9159800198090925, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9667317239059525, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8925738398388144, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058585844143391, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8888787903169728, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8925738398388144, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9128855680689195, + "sentence_nr": 19 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272821491047395, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9413354408985303, + "sentence_nr": 19 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.46619006556188114, + "sentence_nr": 19 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.913896513382741, + "sentence_nr": 19 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9058585844143391, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9557922260754473, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9391656780027514, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9260113686541587, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9419307613884336, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9845996986850503, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9255228522887315, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.889174440461237, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9496761617043387, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9322360743819351, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.935492418630274, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9456325305487512, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9325466173278317, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9240800356922247, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9361690788124847, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.938043640398588, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.901373116210745, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9531605377803356, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9132591460407243, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9494481589794223, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9415361564397403, + "sentence_nr": 19 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.897450557161678, + "sentence_nr": 19 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.937002127196651, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9435408381256087, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9421449698305296, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9607456319189528, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5467617051776391, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969209805167669, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9060555921929084, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8969209805167669, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9096430262961498, + "sentence_nr": 20 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7983940190154283, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9132591460407243, + "sentence_nr": 20 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9204057102575467, + "sentence_nr": 20 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4968312722246179, + "sentence_nr": 20 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8001971757912975, + "sentence_nr": 20 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9204057102575467, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95112146871187, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.949624286506194, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9588139991437585, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9236414681715879, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9453633691396565, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278367059866518, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302237306555959, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8441460025255829, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9470556595464068, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.951863030034636, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8944443568631728, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9082204179924286, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.90717359411325, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9014597856352894, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9250084453288043, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95462554022758, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9318340131711181, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736147802901586, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9182449217144187, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9400180064454685, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9255769217104873, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9309426923102619, + "sentence_nr": 20 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9002012094811458, + "sentence_nr": 20 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9690017425712892, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6924365679057801, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.804543317337012, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8385395593542468, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9515560914045473, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.854435717190483, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6676892344393273, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.873135905690596, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6885773376269438, + "sentence_nr": 21 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.46961217063286037, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8379214027434272, + "sentence_nr": 21 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.20981645725460496, + "sentence_nr": 21 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6659995521111991, + "sentence_nr": 21 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7570244995532351, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8388678282825207, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9180596829241628, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9166274634412449, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8626786769008709, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7991709881281639, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8119656541607598, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8872308158649556, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8914910756561332, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.927494511055529, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9528614248210486, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8523282278495175, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9297633204435644, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9278042759794851, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8998995790099074, + "sentence_nr": 21 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302677881301988, + "sentence_nr": 21 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9522511234396616, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7585159184184324, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8922770448230282, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9126128133576369, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6245412677586388, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.868233862673363, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8852329532489643, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8378994642516495, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8775848642818888, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9448292727000915, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8555426729178464, + "sentence_nr": 22 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7558344174949267, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8497451239178159, + "sentence_nr": 22 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8493237569441244, + "sentence_nr": 22 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199763712080639, + "sentence_nr": 22 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 22 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8206722459046871, + "sentence_nr": 22 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.883570112979728, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.914786293186172, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8845568645036501, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8937192042814042, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.901348698020278, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8382013802825361, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9219786709510569, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8767649499531999, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9094880423990607, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8719390074611821, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9349020382990011, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9272997117562144, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8962185446474815, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8930034245249151, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271664513693498, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8936606750264663, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8803360259381345, + "sentence_nr": 22 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8680210960657176, + "sentence_nr": 22 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7931982206364059, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9438398456065387, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9281186022380125, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9699436870249787, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9184823166209557, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8884834862973964, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9300073119656489, + "sentence_nr": 23 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9381606131991436, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8839868610728687, + "sentence_nr": 23 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9300073119656489, + "sentence_nr": 23 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4072337657555589, + "sentence_nr": 23 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9184823166209557, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9293646790023864, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9511392272878579, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9293879632586071, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9277950353049101, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8843378183459343, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8741633139531418, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9271525909282003, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9736840552120738, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9396084767892234, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9187563342696414, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8896752045577786, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9737097349915758, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9330058893011377, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9570066548501687, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9201684039669155, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9133901345922595, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9458636432813123, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.917857433142856, + "sentence_nr": 23 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9375412439691305, + "sentence_nr": 23 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9556267474396976, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489054429933926, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489054429933926, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8327628422929998, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9249365863966041, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.922528755167094, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9486938895906879, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8620685016584069, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9405916043682414, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9327915990783561, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909738029095061, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.909738029095061, + "sentence_nr": 24 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8707492337114523, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.95883735444933, + "sentence_nr": 24 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455007606735264, + "sentence_nr": 24 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9372630850025364, + "sentence_nr": 24 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5983897920478856, + "sentence_nr": 24 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9299762198228243, + "sentence_nr": 24 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9302303599426779, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544609413449265, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9355306533611718, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432457481338326, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9472285181144658, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.923828763793418, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9224761498105726, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9756278595118478, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9499594621802195, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9544238060448419, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9012364553153411, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199585012210312, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9280048312907723, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9587462450914201, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8938919301593574, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9507758066685948, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432005035367906, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9675203656708941, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9303385434730891, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9401106918306472, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9533532275954528, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9274629860503822, + "sentence_nr": 24 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8926908826740254, + "sentence_nr": 24 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6224897798032885, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7893575827661004, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9478696521177714, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7160421907140165, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6217685026572488, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.794919886900137, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8793006100154936, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5916523997385489, + "sentence_nr": 25 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.4849269488253923, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7462718113811923, + "sentence_nr": 25 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8083701726292805, + "sentence_nr": 25 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.722502153449955, + "sentence_nr": 25 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 25 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.5544920599877754, + "sentence_nr": 25 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6853792233736985, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9200538056807258, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630774769374594, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9143443086107108, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9052744049140443, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9018850910676268, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9142574363760879, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9168431011517528, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9141901633008906, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9317477810881586, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9354759108346813, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9141453314674155, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9550191440621234, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8402328635525613, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.831845583109951, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9092382099397807, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9770044719642067, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9155318202784664, + "sentence_nr": 25 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 25 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8272309965382391, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7767725512278205, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9373981486656514, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9579023880929557, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9268329536813669, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.904428807825769, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9116613044583819, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084279839455062, + "sentence_nr": 26 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8013174743750245, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.34811585804131506, + "sentence_nr": 26 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8912610518101419, + "sentence_nr": 26 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9084279608664247, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9342971539350323, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618018909441389, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9221850850049388, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9621502301102783, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9207497282487874, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8817316559043479, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9595521389704431, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9604273088099046, + "sentence_nr": 26 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8832167531630292, + "sentence_nr": 26 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618018909441389, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9338423795983638, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8069582822584229, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9432051372011929, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8538919155402751, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8601111478550084, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8274840531521687, + "sentence_nr": 27 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8737243337458652, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 27 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8213297311895551, + "sentence_nr": 27 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.3007622907436899, + "sentence_nr": 27 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.906379768806771, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8996352283472103, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8577239523880982, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9705288278234159, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9022302698191352, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9618116705103616, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9282902444420971, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9283062281157928, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9143841728614055, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9148205155364358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9429357495928096, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8199038085123204, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9007500710615358, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9335504867261654, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8519148326217993, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9467340802817513, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8914166352994622, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8697448206881571, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9091527400737927, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9487286082082608, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9550331732946552, + "sentence_nr": 27 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 27 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9703747509928279, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9540941235545723, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9543144589160125, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.652649628941592, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9767775472269087, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9393628940364738, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9410712595774171, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.6444379795256558, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8964898605551818, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9263597385884417, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.824741266541094, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9256238040654331, + "sentence_nr": 28 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8283905649271065, + "sentence_nr": 28 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.0, + "sentence_nr": 28 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.881413837458117, + "sentence_nr": 28 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.899546929868499, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9437940294094723, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9269703177791706, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.944904344834561, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8479413107328494, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9715595760527852, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8595969327963556, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9538713542813556, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8348508116391393, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9572462820044535, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9375119517314923, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9493167367596885, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9344916654109876, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9849529115133767, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9275259780895282, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9276874028790393, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9473074618830379, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9460494618521745, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8923268998495886, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9455007606735264, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9526558782357073, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9338345156544289, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8995764072227389, + "sentence_nr": 28 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9505226544098013, + "sentence_nr": 28 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9630841609539229, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9451142647196181, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7510122845400926, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8090165300577936, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9543128468386116, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "en", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "openai/gpt-4o-mini", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.920197561569537, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.1-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8759929746436435, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3-70b-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-small-24b-instruct-2501", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8935424392990651, + "sentence_nr": 29 + }, + { + "model": "mistralai/mistral-nemo", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.7769676399488106, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8875472267363329, + "sentence_nr": 29 + }, + { + "model": "google/gemini-2.0-flash-lite-001", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "google/gemma-3-27b-it", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "qwen/qwq-32b", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8664932988313133, + "sentence_nr": 29 + }, + { + "model": "microsoft/phi-4-multimodal-instruct", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.906379768806771, + "sentence_nr": 29 + }, + { + "model": "amazon/nova-micro-v1", + "bcp_47": "zh", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8001297194719582, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "hi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9048724843551281, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "es", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8947987168857687, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ar", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9489238765618674, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ur", + "task": "language_modeling", + "metric": "chrf", + "score": 0.945278116491169, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "bn", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pt", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "pa", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8989194854163256, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ru", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "sw", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9061728639858796, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "id", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9501419212325259, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "de", + "task": "language_modeling", + "metric": "chrf", + "score": 0.891206254843651, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ja", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9331628274049639, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "te", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "mr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "jv", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9046319474149982, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "vi", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9463095328863311, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ta", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9085828484030862, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fa", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "tr", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "yue", + "task": "language_modeling", + "metric": "chrf", + "score": 0.8856061163721227, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "ko", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "it", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "fil", + "task": "language_modeling", + "metric": "chrf", + "score": 0.9661878700572512, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "arz", + "task": "language_modeling", + "metric": "chrf", + "score": 1.0, + "sentence_nr": 29 + }, + { + "model": "meta-llama/llama-3.3-70b-instruct", + "bcp_47": "gu", + "task": "language_modeling", + "metric": "chrf", + "score": 0.908669313428767, + "sentence_nr": 29 + } ] \ No newline at end of file